xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision aed4548f7b57b3f3898d52ba71a524e0765402ff)
1c6db04a5SJed Brown #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
2af0996ceSBarry Smith #include <petsc/private/vecimpl.h>
397929ea7SJunchao Zhang #include <petsc/private/sfimpl.h>
4af0996ceSBarry Smith #include <petsc/private/isimpl.h>
5c6db04a5SJed Brown #include <petscblaslapack.h>
60c312b8eSJed Brown #include <petscsf.h>
7bc8e477aSFande Kong #include <petsc/private/hashmapi.h>
88a729477SBarry Smith 
901bebe75SBarry Smith /*MC
1001bebe75SBarry Smith    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
1101bebe75SBarry Smith 
1201bebe75SBarry Smith    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
1301bebe75SBarry Smith    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14a323099bSStefano Zampini   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported
1501bebe75SBarry Smith   for communicators controlling multiple processes.  It is recommended that you call both of
1601bebe75SBarry Smith   the above preallocation routines for simplicity.
1701bebe75SBarry Smith 
1801bebe75SBarry Smith    Options Database Keys:
1901bebe75SBarry Smith . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
2001bebe75SBarry Smith 
2195452b02SPatrick Sanan   Developer Notes:
22f719121fSJed Brown     Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
2301bebe75SBarry Smith    enough exist.
2401bebe75SBarry Smith 
2501bebe75SBarry Smith   Level: beginner
2601bebe75SBarry Smith 
2769b1f4b7SBarry Smith .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
2801bebe75SBarry Smith M*/
2901bebe75SBarry Smith 
3001bebe75SBarry Smith /*MC
3101bebe75SBarry Smith    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
3201bebe75SBarry Smith 
3301bebe75SBarry Smith    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
3401bebe75SBarry Smith    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
3501bebe75SBarry Smith    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
3601bebe75SBarry Smith   for communicators controlling multiple processes.  It is recommended that you call both of
3701bebe75SBarry Smith   the above preallocation routines for simplicity.
3801bebe75SBarry Smith 
3901bebe75SBarry Smith    Options Database Keys:
4001bebe75SBarry Smith . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
4101bebe75SBarry Smith 
4201bebe75SBarry Smith   Level: beginner
4301bebe75SBarry Smith 
4401bebe75SBarry Smith .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
4501bebe75SBarry Smith M*/
4601bebe75SBarry Smith 
47b470e4b4SRichard Tran Mills static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg)
48f74ef234SStefano Zampini {
49f74ef234SStefano Zampini   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
50f74ef234SStefano Zampini 
51f74ef234SStefano Zampini   PetscFunctionBegin;
52f74ef234SStefano Zampini #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
53b470e4b4SRichard Tran Mills   A->boundtocpu = flg;
54f74ef234SStefano Zampini #endif
55f74ef234SStefano Zampini   if (a->A) {
569566063dSJacob Faibussowitsch     PetscCall(MatBindToCPU(a->A,flg));
57f74ef234SStefano Zampini   }
58f74ef234SStefano Zampini   if (a->B) {
599566063dSJacob Faibussowitsch     PetscCall(MatBindToCPU(a->B,flg));
60f74ef234SStefano Zampini   }
613120d049SRichard Tran Mills 
623120d049SRichard Tran Mills   /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products.
633120d049SRichard Tran Mills    * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors
643120d049SRichard Tran Mills    * to differ from the parent matrix. */
653120d049SRichard Tran Mills   if (a->lvec) {
669566063dSJacob Faibussowitsch     PetscCall(VecBindToCPU(a->lvec,flg));
673120d049SRichard Tran Mills   }
683120d049SRichard Tran Mills   if (a->diag) {
699566063dSJacob Faibussowitsch     PetscCall(VecBindToCPU(a->diag,flg));
703120d049SRichard Tran Mills   }
713120d049SRichard Tran Mills 
72f74ef234SStefano Zampini   PetscFunctionReturn(0);
73f74ef234SStefano Zampini }
74f74ef234SStefano Zampini 
7546533700Sstefano_zampini PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
7626bda2c4Sstefano_zampini {
7726bda2c4Sstefano_zampini   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
7826bda2c4Sstefano_zampini 
7926bda2c4Sstefano_zampini   PetscFunctionBegin;
8046533700Sstefano_zampini   if (mat->A) {
819566063dSJacob Faibussowitsch     PetscCall(MatSetBlockSizes(mat->A,rbs,cbs));
829566063dSJacob Faibussowitsch     PetscCall(MatSetBlockSizes(mat->B,rbs,1));
8346533700Sstefano_zampini   }
8426bda2c4Sstefano_zampini   PetscFunctionReturn(0);
8526bda2c4Sstefano_zampini }
8626bda2c4Sstefano_zampini 
87f2c98031SJed Brown PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
8827d4218bSShri Abhyankar {
8927d4218bSShri Abhyankar   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
9027d4218bSShri Abhyankar   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
9127d4218bSShri Abhyankar   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
9227d4218bSShri Abhyankar   const PetscInt  *ia,*ib;
93ce496241SStefano Zampini   const MatScalar *aa,*bb,*aav,*bav;
9427d4218bSShri Abhyankar   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
9527d4218bSShri Abhyankar   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
9627d4218bSShri Abhyankar 
9727d4218bSShri Abhyankar   PetscFunctionBegin;
98f4259b30SLisandro Dalcin   *keptrows = NULL;
99ce496241SStefano Zampini 
10027d4218bSShri Abhyankar   ia   = a->i;
10127d4218bSShri Abhyankar   ib   = b->i;
1029566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayRead(mat->A,&aav));
1039566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayRead(mat->B,&bav));
10427d4218bSShri Abhyankar   for (i=0; i<m; i++) {
10527d4218bSShri Abhyankar     na = ia[i+1] - ia[i];
10627d4218bSShri Abhyankar     nb = ib[i+1] - ib[i];
10727d4218bSShri Abhyankar     if (!na && !nb) {
10827d4218bSShri Abhyankar       cnt++;
10927d4218bSShri Abhyankar       goto ok1;
11027d4218bSShri Abhyankar     }
111ce496241SStefano Zampini     aa = aav + ia[i];
11227d4218bSShri Abhyankar     for (j=0; j<na; j++) {
11327d4218bSShri Abhyankar       if (aa[j] != 0.0) goto ok1;
11427d4218bSShri Abhyankar     }
115ce496241SStefano Zampini     bb = bav + ib[i];
11627d4218bSShri Abhyankar     for (j=0; j <nb; j++) {
11727d4218bSShri Abhyankar       if (bb[j] != 0.0) goto ok1;
11827d4218bSShri Abhyankar     }
11927d4218bSShri Abhyankar     cnt++;
12027d4218bSShri Abhyankar ok1:;
12127d4218bSShri Abhyankar   }
1221c2dc1cbSBarry Smith   PetscCall(MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M)));
123ce496241SStefano Zampini   if (!n0rows) {
1249566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJRestoreArrayRead(mat->A,&aav));
1259566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJRestoreArrayRead(mat->B,&bav));
126ce496241SStefano Zampini     PetscFunctionReturn(0);
127ce496241SStefano Zampini   }
1289566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(M->rmap->n-cnt,&rows));
12927d4218bSShri Abhyankar   cnt  = 0;
13027d4218bSShri Abhyankar   for (i=0; i<m; i++) {
13127d4218bSShri Abhyankar     na = ia[i+1] - ia[i];
13227d4218bSShri Abhyankar     nb = ib[i+1] - ib[i];
13327d4218bSShri Abhyankar     if (!na && !nb) continue;
134ce496241SStefano Zampini     aa = aav + ia[i];
13527d4218bSShri Abhyankar     for (j=0; j<na;j++) {
13627d4218bSShri Abhyankar       if (aa[j] != 0.0) {
13727d4218bSShri Abhyankar         rows[cnt++] = rstart + i;
13827d4218bSShri Abhyankar         goto ok2;
13927d4218bSShri Abhyankar       }
14027d4218bSShri Abhyankar     }
141ce496241SStefano Zampini     bb = bav + ib[i];
14227d4218bSShri Abhyankar     for (j=0; j<nb; j++) {
14327d4218bSShri Abhyankar       if (bb[j] != 0.0) {
14427d4218bSShri Abhyankar         rows[cnt++] = rstart + i;
14527d4218bSShri Abhyankar         goto ok2;
14627d4218bSShri Abhyankar       }
14727d4218bSShri Abhyankar     }
14827d4218bSShri Abhyankar ok2:;
14927d4218bSShri Abhyankar   }
1509566063dSJacob Faibussowitsch   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows));
1519566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayRead(mat->A,&aav));
1529566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayRead(mat->B,&bav));
15327d4218bSShri Abhyankar   PetscFunctionReturn(0);
15427d4218bSShri Abhyankar }
15527d4218bSShri Abhyankar 
15699e65526SBarry Smith PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
15799e65526SBarry Smith {
15899e65526SBarry Smith   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
15994342113SStefano Zampini   PetscBool         cong;
16099e65526SBarry Smith 
16199e65526SBarry Smith   PetscFunctionBegin;
1629566063dSJacob Faibussowitsch   PetscCall(MatHasCongruentLayouts(Y,&cong));
16394342113SStefano Zampini   if (Y->assembled && cong) {
1649566063dSJacob Faibussowitsch     PetscCall(MatDiagonalSet(aij->A,D,is));
16599e65526SBarry Smith   } else {
1669566063dSJacob Faibussowitsch     PetscCall(MatDiagonalSet_Default(Y,D,is));
16799e65526SBarry Smith   }
16899e65526SBarry Smith   PetscFunctionReturn(0);
16999e65526SBarry Smith }
17099e65526SBarry Smith 
171f1f41ecbSJed Brown PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
172f1f41ecbSJed Brown {
173f1f41ecbSJed Brown   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
174f1f41ecbSJed Brown   PetscInt       i,rstart,nrows,*rows;
175f1f41ecbSJed Brown 
176f1f41ecbSJed Brown   PetscFunctionBegin;
1770298fd71SBarry Smith   *zrows = NULL;
1789566063dSJacob Faibussowitsch   PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows));
1799566063dSJacob Faibussowitsch   PetscCall(MatGetOwnershipRange(M,&rstart,NULL));
180f1f41ecbSJed Brown   for (i=0; i<nrows; i++) rows[i] += rstart;
1819566063dSJacob Faibussowitsch   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows));
182f1f41ecbSJed Brown   PetscFunctionReturn(0);
183f1f41ecbSJed Brown }
184f1f41ecbSJed Brown 
185857cbf51SRichard Tran Mills PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A,PetscInt type,PetscReal *reductions)
1860716a85fSBarry Smith {
1870716a85fSBarry Smith   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)A->data;
188a873a8cdSSam Reynolds   PetscInt          i,m,n,*garray = aij->garray;
1890716a85fSBarry Smith   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ*) aij->A->data;
1900716a85fSBarry Smith   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ*) aij->B->data;
1910716a85fSBarry Smith   PetscReal         *work;
192ce496241SStefano Zampini   const PetscScalar *dummy;
1930716a85fSBarry Smith 
1940716a85fSBarry Smith   PetscFunctionBegin;
1959566063dSJacob Faibussowitsch   PetscCall(MatGetSize(A,&m,&n));
1969566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(n,&work));
1979566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayRead(aij->A,&dummy));
1989566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&dummy));
1999566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayRead(aij->B,&dummy));
2009566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&dummy));
201857cbf51SRichard Tran Mills   if (type == NORM_2) {
2020716a85fSBarry Smith     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
2030716a85fSBarry Smith       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
2040716a85fSBarry Smith     }
2050716a85fSBarry Smith     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
2060716a85fSBarry Smith       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
2070716a85fSBarry Smith     }
208857cbf51SRichard Tran Mills   } else if (type == NORM_1) {
2090716a85fSBarry Smith     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
2100716a85fSBarry Smith       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
2110716a85fSBarry Smith     }
2120716a85fSBarry Smith     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
2130716a85fSBarry Smith       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
2140716a85fSBarry Smith     }
215857cbf51SRichard Tran Mills   } else if (type == NORM_INFINITY) {
2160716a85fSBarry Smith     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
2170716a85fSBarry Smith       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
2180716a85fSBarry Smith     }
2190716a85fSBarry Smith     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
2200716a85fSBarry Smith       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
2210716a85fSBarry Smith     }
222857cbf51SRichard Tran Mills   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
223a873a8cdSSam Reynolds     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
224857cbf51SRichard Tran Mills       work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
225a873a8cdSSam Reynolds     }
226a873a8cdSSam Reynolds     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
227857cbf51SRichard Tran Mills       work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
228a873a8cdSSam Reynolds     }
229857cbf51SRichard Tran Mills   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
230857cbf51SRichard Tran Mills     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
231857cbf51SRichard Tran Mills       work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
232857cbf51SRichard Tran Mills     }
233857cbf51SRichard Tran Mills     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
234857cbf51SRichard Tran Mills       work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
235857cbf51SRichard Tran Mills     }
236857cbf51SRichard Tran Mills   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type");
237857cbf51SRichard Tran Mills   if (type == NORM_INFINITY) {
2381c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A)));
2390716a85fSBarry Smith   } else {
2401c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A)));
2410716a85fSBarry Smith   }
2429566063dSJacob Faibussowitsch   PetscCall(PetscFree(work));
243857cbf51SRichard Tran Mills   if (type == NORM_2) {
244a873a8cdSSam Reynolds     for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
245857cbf51SRichard Tran Mills   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
246a873a8cdSSam Reynolds     for (i=0; i<n; i++) reductions[i] /= m;
2470716a85fSBarry Smith   }
2480716a85fSBarry Smith   PetscFunctionReturn(0);
2490716a85fSBarry Smith }
2500716a85fSBarry Smith 
251e52d2c62SBarry Smith PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
252e52d2c62SBarry Smith {
253e52d2c62SBarry Smith   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
254e52d2c62SBarry Smith   IS              sis,gis;
255e52d2c62SBarry Smith   const PetscInt  *isis,*igis;
256e52d2c62SBarry Smith   PetscInt        n,*iis,nsis,ngis,rstart,i;
257e52d2c62SBarry Smith 
258e52d2c62SBarry Smith   PetscFunctionBegin;
2599566063dSJacob Faibussowitsch   PetscCall(MatFindOffBlockDiagonalEntries(a->A,&sis));
2609566063dSJacob Faibussowitsch   PetscCall(MatFindNonzeroRows(a->B,&gis));
2619566063dSJacob Faibussowitsch   PetscCall(ISGetSize(gis,&ngis));
2629566063dSJacob Faibussowitsch   PetscCall(ISGetSize(sis,&nsis));
2639566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(sis,&isis));
2649566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(gis,&igis));
265e52d2c62SBarry Smith 
2669566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(ngis+nsis,&iis));
2679566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(iis,igis,ngis));
2689566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(iis+ngis,isis,nsis));
269e52d2c62SBarry Smith   n    = ngis + nsis;
2709566063dSJacob Faibussowitsch   PetscCall(PetscSortRemoveDupsInt(&n,iis));
2719566063dSJacob Faibussowitsch   PetscCall(MatGetOwnershipRange(A,&rstart,NULL));
272e52d2c62SBarry Smith   for (i=0; i<n; i++) iis[i] += rstart;
2739566063dSJacob Faibussowitsch   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is));
274e52d2c62SBarry Smith 
2759566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(sis,&isis));
2769566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(gis,&igis));
2779566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&sis));
2789566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&gis));
279e52d2c62SBarry Smith   PetscFunctionReturn(0);
280e52d2c62SBarry Smith }
281e52d2c62SBarry Smith 
282dd6ea824SBarry Smith /*
2830f5bd95cSBarry Smith   Local utility routine that creates a mapping from the global column
2849e25ed09SBarry Smith number to the local number in the off-diagonal part of the local
2850f5bd95cSBarry Smith storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
2860f5bd95cSBarry Smith a slightly higher hash table cost; without it it is not scalable (each processor
28772fa4726SStefano Zampini has an order N integer array but is fast to access.
2889e25ed09SBarry Smith */
289ab9863d7SBarry Smith PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
2909e25ed09SBarry Smith {
29144a69424SLois Curfman McInnes   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
292d0f46423SBarry Smith   PetscInt       n = aij->B->cmap->n,i;
293dbb450caSBarry Smith 
2943a40ed3dSBarry Smith   PetscFunctionBegin;
29508401ef6SPierre Jolivet   PetscCheck(!n || aij->garray,PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
296aa482453SBarry Smith #if defined(PETSC_USE_CTABLE)
2979566063dSJacob Faibussowitsch   PetscCall(PetscTableCreate(n,mat->cmap->N+1,&aij->colmap));
298b1fc9764SSatish Balay   for (i=0; i<n; i++) {
2999566063dSJacob Faibussowitsch     PetscCall(PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES));
300b1fc9764SSatish Balay   }
301b1fc9764SSatish Balay #else
3029566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(mat->cmap->N+1,&aij->colmap));
3039566063dSJacob Faibussowitsch   PetscCall(PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt)));
304905e6a2fSBarry Smith   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
305b1fc9764SSatish Balay #endif
3063a40ed3dSBarry Smith   PetscFunctionReturn(0);
3079e25ed09SBarry Smith }
3089e25ed09SBarry Smith 
309d40312a9SBarry Smith #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
3100520107fSSatish Balay { \
311db4deed7SKarl Rupp     if (col <= lastcol1)  low1 = 0;     \
312db4deed7SKarl Rupp     else                 high1 = nrow1; \
313fd3458f5SBarry Smith     lastcol1 = col;\
314fd3458f5SBarry Smith     while (high1-low1 > 5) { \
315fd3458f5SBarry Smith       t = (low1+high1)/2; \
316fd3458f5SBarry Smith       if (rp1[t] > col) high1 = t; \
317fd3458f5SBarry Smith       else              low1  = t; \
318ba4e3ef2SSatish Balay     } \
319fd3458f5SBarry Smith       for (_i=low1; _i<high1; _i++) { \
320fd3458f5SBarry Smith         if (rp1[_i] > col) break; \
321fd3458f5SBarry Smith         if (rp1[_i] == col) { \
3220c0d7e18SFande Kong           if (addv == ADD_VALUES) { \
3230c0d7e18SFande Kong             ap1[_i] += value;   \
3240c0d7e18SFande Kong             /* Not sure LogFlops will slow dow the code or not */ \
3250c0d7e18SFande Kong             (void)PetscLogFlops(1.0);   \
3260c0d7e18SFande Kong            } \
327fd3458f5SBarry Smith           else                    ap1[_i] = value; \
32830770e4dSSatish Balay           goto a_noinsert; \
3290520107fSSatish Balay         } \
3300520107fSSatish Balay       }  \
331dcd36c23SBarry Smith       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
332e44c0bd4SBarry Smith       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
33308401ef6SPierre Jolivet       PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
334fef13f97SBarry Smith       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
335669a8dbcSSatish Balay       N = nrow1++ - 1; a->nz++; high1++; \
3360520107fSSatish Balay       /* shift up all the later entries in this row */ \
3379566063dSJacob Faibussowitsch       PetscCall(PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1));\
3389566063dSJacob Faibussowitsch       PetscCall(PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1));\
339fd3458f5SBarry Smith       rp1[_i] = col;  \
340fd3458f5SBarry Smith       ap1[_i] = value;  \
341e56f5c9eSBarry Smith       A->nonzerostate++;\
34230770e4dSSatish Balay       a_noinsert: ; \
343fd3458f5SBarry Smith       ailen[row] = nrow1; \
3440520107fSSatish Balay }
3450a198c4cSBarry Smith 
346d40312a9SBarry Smith #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
34730770e4dSSatish Balay   { \
348db4deed7SKarl Rupp     if (col <= lastcol2) low2 = 0;                        \
349db4deed7SKarl Rupp     else high2 = nrow2;                                   \
350fd3458f5SBarry Smith     lastcol2 = col;                                       \
351fd3458f5SBarry Smith     while (high2-low2 > 5) {                              \
352fd3458f5SBarry Smith       t = (low2+high2)/2;                                 \
353fd3458f5SBarry Smith       if (rp2[t] > col) high2 = t;                        \
354fd3458f5SBarry Smith       else             low2  = t;                         \
355ba4e3ef2SSatish Balay     }                                                     \
356fd3458f5SBarry Smith     for (_i=low2; _i<high2; _i++) {                       \
357fd3458f5SBarry Smith       if (rp2[_i] > col) break;                           \
358fd3458f5SBarry Smith       if (rp2[_i] == col) {                               \
3590c0d7e18SFande Kong         if (addv == ADD_VALUES) {                         \
3600c0d7e18SFande Kong           ap2[_i] += value;                               \
3610c0d7e18SFande Kong           (void)PetscLogFlops(1.0);                       \
3620c0d7e18SFande Kong         }                                                 \
363fd3458f5SBarry Smith         else                    ap2[_i] = value;          \
36430770e4dSSatish Balay         goto b_noinsert;                                  \
36530770e4dSSatish Balay       }                                                   \
36630770e4dSSatish Balay     }                                                     \
367e44c0bd4SBarry Smith     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
368e44c0bd4SBarry Smith     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
36908401ef6SPierre Jolivet     PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
370fef13f97SBarry Smith     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
371669a8dbcSSatish Balay     N = nrow2++ - 1; b->nz++; high2++;                    \
37230770e4dSSatish Balay     /* shift up all the later entries in this row */      \
3739566063dSJacob Faibussowitsch     PetscCall(PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1));\
3749566063dSJacob Faibussowitsch     PetscCall(PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1));\
375fd3458f5SBarry Smith     rp2[_i] = col;                                        \
376fd3458f5SBarry Smith     ap2[_i] = value;                                      \
377e56f5c9eSBarry Smith     B->nonzerostate++;                                    \
37830770e4dSSatish Balay     b_noinsert: ;                                         \
379fd3458f5SBarry Smith     bilen[row] = nrow2;                                   \
38030770e4dSSatish Balay   }
38130770e4dSSatish Balay 
3822fd7e33dSBarry Smith PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
3832fd7e33dSBarry Smith {
3842fd7e33dSBarry Smith   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
3852fd7e33dSBarry Smith   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
3862fd7e33dSBarry Smith   PetscInt       l,*garray = mat->garray,diag;
387fff043a9SJunchao Zhang   PetscScalar    *aa,*ba;
3882fd7e33dSBarry Smith 
3892fd7e33dSBarry Smith   PetscFunctionBegin;
3902fd7e33dSBarry Smith   /* code only works for square matrices A */
3912fd7e33dSBarry Smith 
3922fd7e33dSBarry Smith   /* find size of row to the left of the diagonal part */
3939566063dSJacob Faibussowitsch   PetscCall(MatGetOwnershipRange(A,&diag,NULL));
3942fd7e33dSBarry Smith   row  = row - diag;
3952fd7e33dSBarry Smith   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
3962fd7e33dSBarry Smith     if (garray[b->j[b->i[row]+l]] > diag) break;
3972fd7e33dSBarry Smith   }
398fff043a9SJunchao Zhang   if (l) {
3999566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJGetArray(mat->B,&ba));
4009566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(ba+b->i[row],v,l));
4019566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJRestoreArray(mat->B,&ba));
402fff043a9SJunchao Zhang   }
4032fd7e33dSBarry Smith 
4042fd7e33dSBarry Smith   /* diagonal part */
405fff043a9SJunchao Zhang   if (a->i[row+1]-a->i[row]) {
4069566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJGetArray(mat->A,&aa));
4079566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(aa+a->i[row],v+l,(a->i[row+1]-a->i[row])));
4089566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJRestoreArray(mat->A,&aa));
409fff043a9SJunchao Zhang   }
4102fd7e33dSBarry Smith 
4112fd7e33dSBarry Smith   /* right of diagonal part */
412fff043a9SJunchao Zhang   if (b->i[row+1]-b->i[row]-l) {
4139566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJGetArray(mat->B,&ba));
4149566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(ba+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l));
4159566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJRestoreArray(mat->B,&ba));
416fff043a9SJunchao Zhang   }
4172fd7e33dSBarry Smith   PetscFunctionReturn(0);
4182fd7e33dSBarry Smith }
4192fd7e33dSBarry Smith 
420b1d57f15SBarry Smith PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
4218a729477SBarry Smith {
42244a69424SLois Curfman McInnes   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
423071fcb05SBarry Smith   PetscScalar    value = 0.0;
424d0f46423SBarry Smith   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
425d0f46423SBarry Smith   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
426ace3abfcSBarry Smith   PetscBool      roworiented = aij->roworiented;
4278a729477SBarry Smith 
4280520107fSSatish Balay   /* Some Variables required in the macro */
4294ee7247eSSatish Balay   Mat        A                    = aij->A;
4304ee7247eSSatish Balay   Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
43157809a77SBarry Smith   PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
432ace3abfcSBarry Smith   PetscBool  ignorezeroentries    = a->ignorezeroentries;
43330770e4dSSatish Balay   Mat        B                    = aij->B;
43430770e4dSSatish Balay   Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
435d0f46423SBarry Smith   PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
436ce496241SStefano Zampini   MatScalar  *aa,*ba;
437fd3458f5SBarry Smith   PetscInt   *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
4388d76821aSHong Zhang   PetscInt   nonew;
439a77337e4SBarry Smith   MatScalar  *ap1,*ap2;
4404ee7247eSSatish Balay 
4413a40ed3dSBarry Smith   PetscFunctionBegin;
4429566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArray(A,&aa));
4439566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArray(B,&ba));
4448a729477SBarry Smith   for (i=0; i<m; i++) {
4455ef9f2a5SBarry Smith     if (im[i] < 0) continue;
44608401ef6SPierre Jolivet     PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1);
4474b0e389bSBarry Smith     if (im[i] >= rstart && im[i] < rend) {
4484b0e389bSBarry Smith       row      = im[i] - rstart;
449fd3458f5SBarry Smith       lastcol1 = -1;
450fd3458f5SBarry Smith       rp1      = aj + ai[row];
451fd3458f5SBarry Smith       ap1      = aa + ai[row];
452fd3458f5SBarry Smith       rmax1    = aimax[row];
453fd3458f5SBarry Smith       nrow1    = ailen[row];
454fd3458f5SBarry Smith       low1     = 0;
455fd3458f5SBarry Smith       high1    = nrow1;
456fd3458f5SBarry Smith       lastcol2 = -1;
457fd3458f5SBarry Smith       rp2      = bj + bi[row];
458d498b1e9SBarry Smith       ap2      = ba + bi[row];
459fd3458f5SBarry Smith       rmax2    = bimax[row];
460d498b1e9SBarry Smith       nrow2    = bilen[row];
461fd3458f5SBarry Smith       low2     = 0;
462fd3458f5SBarry Smith       high2    = nrow2;
463fd3458f5SBarry Smith 
4641eb62cbbSBarry Smith       for (j=0; j<n; j++) {
465071fcb05SBarry Smith         if (v)  value = roworiented ? v[i*n+j] : v[i+j*m];
466c80a64e6SBarry Smith         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
467fd3458f5SBarry Smith         if (in[j] >= cstart && in[j] < cend) {
468fd3458f5SBarry Smith           col   = in[j] - cstart;
4698d76821aSHong Zhang           nonew = a->nonew;
470d40312a9SBarry Smith           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
471273d9f13SBarry Smith         } else if (in[j] < 0) continue;
47208401ef6SPierre Jolivet         else PetscCheck(in[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1);
4731eb62cbbSBarry Smith         else {
474227d817aSBarry Smith           if (mat->was_assembled) {
475905e6a2fSBarry Smith             if (!aij->colmap) {
4769566063dSJacob Faibussowitsch               PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
477905e6a2fSBarry Smith             }
478aa482453SBarry Smith #if defined(PETSC_USE_CTABLE)
4799566063dSJacob Faibussowitsch             PetscCall(PetscTableFind(aij->colmap,in[j]+1,&col)); /* map global col ids to local ones */
480fa46199cSSatish Balay             col--;
481b1fc9764SSatish Balay #else
482905e6a2fSBarry Smith             col = aij->colmap[in[j]] - 1;
483b1fc9764SSatish Balay #endif
484fff043a9SJunchao Zhang             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */
4859566063dSJacob Faibussowitsch               PetscCall(MatDisAssemble_MPIAIJ(mat)); /* Change aij->B from reduced/local format to expanded/global format */
4864b0e389bSBarry Smith               col  =  in[j];
4879bf004c3SSatish Balay               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
488f9508a3cSSatish Balay               B        = aij->B;
489f9508a3cSSatish Balay               b        = (Mat_SeqAIJ*)B->data;
490e44c0bd4SBarry Smith               bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
491d498b1e9SBarry Smith               rp2      = bj + bi[row];
492d498b1e9SBarry Smith               ap2      = ba + bi[row];
493d498b1e9SBarry Smith               rmax2    = bimax[row];
494d498b1e9SBarry Smith               nrow2    = bilen[row];
495d498b1e9SBarry Smith               low2     = 0;
496d498b1e9SBarry Smith               high2    = nrow2;
497d0f46423SBarry Smith               bm       = aij->B->rmap->n;
498f9508a3cSSatish Balay               ba       = b->a;
499d707bf6cSMatthew Knepley             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
5000587a0fcSBarry Smith               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
5019566063dSJacob Faibussowitsch                 PetscCall(PetscInfo(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n",(double)PetscRealPart(value),im[i],in[j]));
50298921bdaSJacob Faibussowitsch               } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
5030587a0fcSBarry Smith             }
504c48de900SBarry Smith           } else col = in[j];
5058d76821aSHong Zhang           nonew = b->nonew;
506d40312a9SBarry Smith           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5071eb62cbbSBarry Smith         }
5081eb62cbbSBarry Smith       }
5095ef9f2a5SBarry Smith     } else {
51028b400f6SJacob Faibussowitsch       PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
51190f02eecSBarry Smith       if (!aij->donotstash) {
5125080c13bSMatthew G Knepley         mat->assembled = PETSC_FALSE;
513d36fbae8SSatish Balay         if (roworiented) {
5149566063dSJacob Faibussowitsch           PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
515d36fbae8SSatish Balay         } else {
5169566063dSJacob Faibussowitsch           PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
5174b0e389bSBarry Smith         }
5181eb62cbbSBarry Smith       }
5198a729477SBarry Smith     }
52090f02eecSBarry Smith   }
5219566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArray(A,&aa));
5229566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArray(B,&ba));
5233a40ed3dSBarry Smith   PetscFunctionReturn(0);
5248a729477SBarry Smith }
5258a729477SBarry Smith 
5262b08fdbeSandi selinger /*
527904d1e70Sandi selinger     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
5282b08fdbeSandi selinger     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
529904d1e70Sandi selinger     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
5302b08fdbeSandi selinger */
531904d1e70Sandi selinger PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
532904d1e70Sandi selinger {
533904d1e70Sandi selinger   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
534904d1e70Sandi selinger   Mat            A           = aij->A; /* diagonal part of the matrix */
535904d1e70Sandi selinger   Mat            B           = aij->B; /* offdiagonal part of the matrix */
536904d1e70Sandi selinger   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
537904d1e70Sandi selinger   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
538904d1e70Sandi selinger   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
539904d1e70Sandi selinger   PetscInt       *ailen      = a->ilen,*aj = a->j;
540904d1e70Sandi selinger   PetscInt       *bilen      = b->ilen,*bj = b->j;
5416dc1ffa3Sandi selinger   PetscInt       am          = aij->A->rmap->n,j;
542904d1e70Sandi selinger   PetscInt       diag_so_far = 0,dnz;
543904d1e70Sandi selinger   PetscInt       offd_so_far = 0,onz;
544904d1e70Sandi selinger 
545904d1e70Sandi selinger   PetscFunctionBegin;
546904d1e70Sandi selinger   /* Iterate over all rows of the matrix */
547904d1e70Sandi selinger   for (j=0; j<am; j++) {
548904d1e70Sandi selinger     dnz = onz = 0;
549904d1e70Sandi selinger     /*  Iterate over all non-zero columns of the current row */
5506dc1ffa3Sandi selinger     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
551904d1e70Sandi selinger       /* If column is in the diagonal */
552904d1e70Sandi selinger       if (mat_j[col] >= cstart && mat_j[col] < cend) {
553904d1e70Sandi selinger         aj[diag_so_far++] = mat_j[col] - cstart;
554904d1e70Sandi selinger         dnz++;
555904d1e70Sandi selinger       } else { /* off-diagonal entries */
556904d1e70Sandi selinger         bj[offd_so_far++] = mat_j[col];
557904d1e70Sandi selinger         onz++;
558904d1e70Sandi selinger       }
559904d1e70Sandi selinger     }
560904d1e70Sandi selinger     ailen[j] = dnz;
561904d1e70Sandi selinger     bilen[j] = onz;
562904d1e70Sandi selinger   }
563904d1e70Sandi selinger   PetscFunctionReturn(0);
564904d1e70Sandi selinger }
565904d1e70Sandi selinger 
566904d1e70Sandi selinger /*
567904d1e70Sandi selinger     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
568904d1e70Sandi selinger     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
5691de21080Sandi selinger     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
5701de21080Sandi selinger     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
5711de21080Sandi selinger     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
572904d1e70Sandi selinger */
573e9ede7d0Sandi selinger PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
5743a063d27Sandi selinger {
5753a063d27Sandi selinger   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
5763a063d27Sandi selinger   Mat            A      = aij->A; /* diagonal part of the matrix */
5773a063d27Sandi selinger   Mat            B      = aij->B; /* offdiagonal part of the matrix */
578e9ede7d0Sandi selinger   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
5793a063d27Sandi selinger   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
5803a063d27Sandi selinger   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
5813a063d27Sandi selinger   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
5823a063d27Sandi selinger   PetscInt       *ailen = a->ilen,*aj = a->j;
5833a063d27Sandi selinger   PetscInt       *bilen = b->ilen,*bj = b->j;
5846dc1ffa3Sandi selinger   PetscInt       am     = aij->A->rmap->n,j;
5851de21080Sandi selinger   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
586904d1e70Sandi selinger   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
587904d1e70Sandi selinger   PetscScalar    *aa = a->a,*ba = b->a;
5883a063d27Sandi selinger 
5893a063d27Sandi selinger   PetscFunctionBegin;
5903a063d27Sandi selinger   /* Iterate over all rows of the matrix */
5913a063d27Sandi selinger   for (j=0; j<am; j++) {
592904d1e70Sandi selinger     dnz_row = onz_row = 0;
593904d1e70Sandi selinger     rowstart_offd = full_offd_i[j];
594904d1e70Sandi selinger     rowstart_diag = full_diag_i[j];
595e9ede7d0Sandi selinger     /*  Iterate over all non-zero columns of the current row */
596e9ede7d0Sandi selinger     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
597ae8e66a0Sandi selinger       /* If column is in the diagonal */
5983a063d27Sandi selinger       if (mat_j[col] >= cstart && mat_j[col] < cend) {
599904d1e70Sandi selinger         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
600904d1e70Sandi selinger         aa[rowstart_diag+dnz_row] = mat_a[col];
601904d1e70Sandi selinger         dnz_row++;
602ae8e66a0Sandi selinger       } else { /* off-diagonal entries */
603904d1e70Sandi selinger         bj[rowstart_offd+onz_row] = mat_j[col];
604904d1e70Sandi selinger         ba[rowstart_offd+onz_row] = mat_a[col];
605904d1e70Sandi selinger         onz_row++;
6063a063d27Sandi selinger       }
6073a063d27Sandi selinger     }
608904d1e70Sandi selinger     ailen[j] = dnz_row;
609904d1e70Sandi selinger     bilen[j] = onz_row;
6103a063d27Sandi selinger   }
6113a063d27Sandi selinger   PetscFunctionReturn(0);
6123a063d27Sandi selinger }
6133a063d27Sandi selinger 
614b1d57f15SBarry Smith PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
615b49de8d1SLois Curfman McInnes {
616b49de8d1SLois Curfman McInnes   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
617d0f46423SBarry Smith   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
618d0f46423SBarry Smith   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
619b49de8d1SLois Curfman McInnes 
6203a40ed3dSBarry Smith   PetscFunctionBegin;
621b49de8d1SLois Curfman McInnes   for (i=0; i<m; i++) {
62254c59aa7SJacob Faibussowitsch     if (idxm[i] < 0) continue; /* negative row */
62354c59aa7SJacob Faibussowitsch     PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,idxm[i],mat->rmap->N-1);
624b49de8d1SLois Curfman McInnes     if (idxm[i] >= rstart && idxm[i] < rend) {
625b49de8d1SLois Curfman McInnes       row = idxm[i] - rstart;
626b49de8d1SLois Curfman McInnes       for (j=0; j<n; j++) {
62754c59aa7SJacob Faibussowitsch         if (idxn[j] < 0) continue; /* negative column */
62854c59aa7SJacob Faibussowitsch         PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,idxn[j],mat->cmap->N-1);
629b49de8d1SLois Curfman McInnes         if (idxn[j] >= cstart && idxn[j] < cend) {
630b49de8d1SLois Curfman McInnes           col  = idxn[j] - cstart;
6319566063dSJacob Faibussowitsch           PetscCall(MatGetValues(aij->A,1,&row,1,&col,v+i*n+j));
632fa852ad4SSatish Balay         } else {
633905e6a2fSBarry Smith           if (!aij->colmap) {
6349566063dSJacob Faibussowitsch             PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
635905e6a2fSBarry Smith           }
636aa482453SBarry Smith #if defined(PETSC_USE_CTABLE)
6379566063dSJacob Faibussowitsch           PetscCall(PetscTableFind(aij->colmap,idxn[j]+1,&col));
638fa46199cSSatish Balay           col--;
639b1fc9764SSatish Balay #else
640905e6a2fSBarry Smith           col = aij->colmap[idxn[j]] - 1;
641b1fc9764SSatish Balay #endif
642e60e1c95SSatish Balay           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
643d9d09a02SSatish Balay           else {
6449566063dSJacob Faibussowitsch             PetscCall(MatGetValues(aij->B,1,&row,1,&col,v+i*n+j));
645b49de8d1SLois Curfman McInnes           }
646b49de8d1SLois Curfman McInnes         }
647b49de8d1SLois Curfman McInnes       }
648f23aa3ddSBarry Smith     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
649b49de8d1SLois Curfman McInnes   }
6503a40ed3dSBarry Smith   PetscFunctionReturn(0);
651b49de8d1SLois Curfman McInnes }
652bc5ccf88SSatish Balay 
653dfbe8321SBarry Smith PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
654bc5ccf88SSatish Balay {
655bc5ccf88SSatish Balay   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
656b1d57f15SBarry Smith   PetscInt       nstash,reallocs;
657bc5ccf88SSatish Balay 
658bc5ccf88SSatish Balay   PetscFunctionBegin;
6592205254eSKarl Rupp   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
660bc5ccf88SSatish Balay 
6619566063dSJacob Faibussowitsch   PetscCall(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range));
6629566063dSJacob Faibussowitsch   PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs));
6639566063dSJacob Faibussowitsch   PetscCall(PetscInfo(aij->A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs));
664bc5ccf88SSatish Balay   PetscFunctionReturn(0);
665bc5ccf88SSatish Balay }
666bc5ccf88SSatish Balay 
667dfbe8321SBarry Smith PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
668bc5ccf88SSatish Balay {
669bc5ccf88SSatish Balay   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
670b1d57f15SBarry Smith   PetscMPIInt    n;
671b1d57f15SBarry Smith   PetscInt       i,j,rstart,ncols,flg;
672e44c0bd4SBarry Smith   PetscInt       *row,*col;
673ace3abfcSBarry Smith   PetscBool      other_disassembled;
67487828ca2SBarry Smith   PetscScalar    *val;
675bc5ccf88SSatish Balay 
67691c97fd4SSatish Balay   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
6776e111a19SKarl Rupp 
678bc5ccf88SSatish Balay   PetscFunctionBegin;
6794cb17eb5SBarry Smith   if (!aij->donotstash && !mat->nooffprocentries) {
680a2d1c673SSatish Balay     while (1) {
6819566063dSJacob Faibussowitsch       PetscCall(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg));
682a2d1c673SSatish Balay       if (!flg) break;
683a2d1c673SSatish Balay 
684bc5ccf88SSatish Balay       for (i=0; i<n;) {
685bc5ccf88SSatish Balay         /* Now identify the consecutive vals belonging to the same row */
6862205254eSKarl Rupp         for (j=i,rstart=row[j]; j<n; j++) {
6872205254eSKarl Rupp           if (row[j] != rstart) break;
6882205254eSKarl Rupp         }
689bc5ccf88SSatish Balay         if (j < n) ncols = j-i;
690bc5ccf88SSatish Balay         else       ncols = n-i;
691bc5ccf88SSatish Balay         /* Now assemble all these values with a single function call */
6929566063dSJacob Faibussowitsch         PetscCall(MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode));
693bc5ccf88SSatish Balay         i    = j;
694bc5ccf88SSatish Balay       }
695bc5ccf88SSatish Balay     }
6969566063dSJacob Faibussowitsch     PetscCall(MatStashScatterEnd_Private(&mat->stash));
697bc5ccf88SSatish Balay   }
6988c3ff71bSJunchao Zhang #if defined(PETSC_HAVE_DEVICE)
699c70f7ee4SJunchao Zhang   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
7009ecce9b1SRichard Tran Mills   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
7019ecce9b1SRichard Tran Mills   if (mat->boundtocpu) {
7029566063dSJacob Faibussowitsch     PetscCall(MatBindToCPU(aij->A,PETSC_TRUE));
7039566063dSJacob Faibussowitsch     PetscCall(MatBindToCPU(aij->B,PETSC_TRUE));
7049ecce9b1SRichard Tran Mills   }
705e2cf4d64SStefano Zampini #endif
7069566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(aij->A,mode));
7079566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(aij->A,mode));
708bc5ccf88SSatish Balay 
709bc5ccf88SSatish Balay   /* determine if any processor has disassembled, if so we must
710071fcb05SBarry Smith      also disassemble ourself, in order that we may reassemble. */
711bc5ccf88SSatish Balay   /*
712bc5ccf88SSatish Balay      if nonzero structure of submatrix B cannot change then we know that
713bc5ccf88SSatish Balay      no processor disassembled thus we can skip this stuff
714bc5ccf88SSatish Balay   */
715bc5ccf88SSatish Balay   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
7161c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat)));
717fff043a9SJunchao Zhang     if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globaly it does not */
7189566063dSJacob Faibussowitsch       PetscCall(MatDisAssemble_MPIAIJ(mat));
719ad59fb31SSatish Balay     }
720ad59fb31SSatish Balay   }
721bc5ccf88SSatish Balay   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
7229566063dSJacob Faibussowitsch     PetscCall(MatSetUpMultiply_MPIAIJ(mat));
723bc5ccf88SSatish Balay   }
7249566063dSJacob Faibussowitsch   PetscCall(MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE));
7258c3ff71bSJunchao Zhang #if defined(PETSC_HAVE_DEVICE)
726c70f7ee4SJunchao Zhang   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
727e2cf4d64SStefano Zampini #endif
7289566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(aij->B,mode));
7299566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(aij->B,mode));
730bc5ccf88SSatish Balay 
7319566063dSJacob Faibussowitsch   PetscCall(PetscFree2(aij->rowvalues,aij->rowindices));
7322205254eSKarl Rupp 
733f4259b30SLisandro Dalcin   aij->rowvalues = NULL;
734a30b2313SHong Zhang 
7359566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&aij->diag));
736e56f5c9eSBarry Smith 
7374f9cfa9eSBarry Smith   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
7384f9cfa9eSBarry Smith   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
739e56f5c9eSBarry Smith     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
7401c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat)));
741e56f5c9eSBarry Smith   }
7428c3ff71bSJunchao Zhang #if defined(PETSC_HAVE_DEVICE)
743c70f7ee4SJunchao Zhang   mat->offloadmask = PETSC_OFFLOAD_BOTH;
744e2cf4d64SStefano Zampini #endif
745bc5ccf88SSatish Balay   PetscFunctionReturn(0);
746bc5ccf88SSatish Balay }
747bc5ccf88SSatish Balay 
748dfbe8321SBarry Smith PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
7491eb62cbbSBarry Smith {
75044a69424SLois Curfman McInnes   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
7513a40ed3dSBarry Smith 
7523a40ed3dSBarry Smith   PetscFunctionBegin;
7539566063dSJacob Faibussowitsch   PetscCall(MatZeroEntries(l->A));
7549566063dSJacob Faibussowitsch   PetscCall(MatZeroEntries(l->B));
7553a40ed3dSBarry Smith   PetscFunctionReturn(0);
7561eb62cbbSBarry Smith }
7571eb62cbbSBarry Smith 
7582b40b63fSBarry Smith PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
7591eb62cbbSBarry Smith {
7601b1dd7adSMatthew G. Knepley   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *) A->data;
761a92ad425SStefano Zampini   PetscObjectState sA, sB;
7621b1dd7adSMatthew G. Knepley   PetscInt        *lrows;
7636e520ac8SStefano Zampini   PetscInt         r, len;
764a92ad425SStefano Zampini   PetscBool        cong, lch, gch;
7651eb62cbbSBarry Smith 
7663a40ed3dSBarry Smith   PetscFunctionBegin;
7676e520ac8SStefano Zampini   /* get locally owned rows */
7689566063dSJacob Faibussowitsch   PetscCall(MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows));
7699566063dSJacob Faibussowitsch   PetscCall(MatHasCongruentLayouts(A,&cong));
77097b48c8fSBarry Smith   /* fix right hand side if needed */
77197b48c8fSBarry Smith   if (x && b) {
7721b1dd7adSMatthew G. Knepley     const PetscScalar *xx;
7731b1dd7adSMatthew G. Knepley     PetscScalar       *bb;
7741b1dd7adSMatthew G. Knepley 
77528b400f6SJacob Faibussowitsch     PetscCheck(cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
7769566063dSJacob Faibussowitsch     PetscCall(VecGetArrayRead(x, &xx));
7779566063dSJacob Faibussowitsch     PetscCall(VecGetArray(b, &bb));
7781b1dd7adSMatthew G. Knepley     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
7799566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayRead(x, &xx));
7809566063dSJacob Faibussowitsch     PetscCall(VecRestoreArray(b, &bb));
78197b48c8fSBarry Smith   }
782a92ad425SStefano Zampini 
783a92ad425SStefano Zampini   sA = mat->A->nonzerostate;
784a92ad425SStefano Zampini   sB = mat->B->nonzerostate;
785a92ad425SStefano Zampini 
786a92ad425SStefano Zampini   if (diag != 0.0 && cong) {
7879566063dSJacob Faibussowitsch     PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL));
7889566063dSJacob Faibussowitsch     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
789a92ad425SStefano Zampini   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
790a92ad425SStefano Zampini     Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data;
791a92ad425SStefano Zampini     Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data;
792a92ad425SStefano Zampini     PetscInt   nnwA, nnwB;
793a92ad425SStefano Zampini     PetscBool  nnzA, nnzB;
794a92ad425SStefano Zampini 
795a92ad425SStefano Zampini     nnwA = aijA->nonew;
796a92ad425SStefano Zampini     nnwB = aijB->nonew;
797a92ad425SStefano Zampini     nnzA = aijA->keepnonzeropattern;
798a92ad425SStefano Zampini     nnzB = aijB->keepnonzeropattern;
799a92ad425SStefano Zampini     if (!nnzA) {
8009566063dSJacob Faibussowitsch       PetscCall(PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n"));
801a92ad425SStefano Zampini       aijA->nonew = 0;
802a92ad425SStefano Zampini     }
803a92ad425SStefano Zampini     if (!nnzB) {
8049566063dSJacob Faibussowitsch       PetscCall(PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n"));
805a92ad425SStefano Zampini       aijB->nonew = 0;
806a92ad425SStefano Zampini     }
807a92ad425SStefano Zampini     /* Must zero here before the next loop */
8089566063dSJacob Faibussowitsch     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
8099566063dSJacob Faibussowitsch     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
8101b1dd7adSMatthew G. Knepley     for (r = 0; r < len; ++r) {
8111b1dd7adSMatthew G. Knepley       const PetscInt row = lrows[r] + A->rmap->rstart;
812a92ad425SStefano Zampini       if (row >= A->cmap->N) continue;
8139566063dSJacob Faibussowitsch       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
814e2d53e46SBarry Smith     }
815a92ad425SStefano Zampini     aijA->nonew = nnwA;
816a92ad425SStefano Zampini     aijB->nonew = nnwB;
8176eb55b6aSBarry Smith   } else {
8189566063dSJacob Faibussowitsch     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
8199566063dSJacob Faibussowitsch     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
8206eb55b6aSBarry Smith   }
8219566063dSJacob Faibussowitsch   PetscCall(PetscFree(lrows));
8229566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY));
8239566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY));
8244f9cfa9eSBarry Smith 
825a92ad425SStefano Zampini   /* reduce nonzerostate */
826a92ad425SStefano Zampini   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
8271c2dc1cbSBarry Smith   PetscCall(MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A)));
828a92ad425SStefano Zampini   if (gch) A->nonzerostate++;
8293a40ed3dSBarry Smith   PetscFunctionReturn(0);
8301eb62cbbSBarry Smith }
8311eb62cbbSBarry Smith 
8329c7c4993SBarry Smith PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
8339c7c4993SBarry Smith {
8349c7c4993SBarry Smith   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
8355ba17502SJed Brown   PetscMPIInt       n = A->rmap->n;
836131c27b5Sprj-   PetscInt          i,j,r,m,len = 0;
83754bd4135SMatthew G. Knepley   PetscInt          *lrows,*owners = A->rmap->range;
838131c27b5Sprj-   PetscMPIInt       p = 0;
83954bd4135SMatthew G. Knepley   PetscSFNode       *rrows;
84054bd4135SMatthew G. Knepley   PetscSF           sf;
8419c7c4993SBarry Smith   const PetscScalar *xx;
842fff043a9SJunchao Zhang   PetscScalar       *bb,*mask,*aij_a;
843564f14d6SBarry Smith   Vec               xmask,lmask;
844564f14d6SBarry Smith   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
845564f14d6SBarry Smith   const PetscInt    *aj, *ii,*ridx;
846564f14d6SBarry Smith   PetscScalar       *aa;
8479c7c4993SBarry Smith 
8489c7c4993SBarry Smith   PetscFunctionBegin;
84954bd4135SMatthew G. Knepley   /* Create SF where leaves are input rows and roots are owned rows */
8509566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n, &lrows));
85154bd4135SMatthew G. Knepley   for (r = 0; r < n; ++r) lrows[r] = -1;
8529566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(N, &rrows));
85354bd4135SMatthew G. Knepley   for (r = 0; r < N; ++r) {
85454bd4135SMatthew G. Knepley     const PetscInt idx   = rows[r];
855*aed4548fSBarry Smith     PetscCheck(idx >= 0 && A->rmap->N > idx,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")",idx,A->rmap->N);
8565ba17502SJed Brown     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
8579566063dSJacob Faibussowitsch       PetscCall(PetscLayoutFindOwner(A->rmap,idx,&p));
8585ba17502SJed Brown     }
85954bd4135SMatthew G. Knepley     rrows[r].rank  = p;
86054bd4135SMatthew G. Knepley     rrows[r].index = rows[r] - owners[p];
8619c7c4993SBarry Smith   }
8629566063dSJacob Faibussowitsch   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject) A), &sf));
8639566063dSJacob Faibussowitsch   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
86454bd4135SMatthew G. Knepley   /* Collect flags for rows to be zeroed */
8659566063dSJacob Faibussowitsch   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR));
8669566063dSJacob Faibussowitsch   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR));
8679566063dSJacob Faibussowitsch   PetscCall(PetscSFDestroy(&sf));
86854bd4135SMatthew G. Knepley   /* Compress and put in row numbers */
86954bd4135SMatthew G. Knepley   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
870564f14d6SBarry Smith   /* zero diagonal part of matrix */
8719566063dSJacob Faibussowitsch   PetscCall(MatZeroRowsColumns(l->A,len,lrows,diag,x,b));
872564f14d6SBarry Smith   /* handle off diagonal part of matrix */
8739566063dSJacob Faibussowitsch   PetscCall(MatCreateVecs(A,&xmask,NULL));
8749566063dSJacob Faibussowitsch   PetscCall(VecDuplicate(l->lvec,&lmask));
8759566063dSJacob Faibussowitsch   PetscCall(VecGetArray(xmask,&bb));
87654bd4135SMatthew G. Knepley   for (i=0; i<len; i++) bb[lrows[i]] = 1;
8779566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(xmask,&bb));
8789566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD));
8799566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD));
8809566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&xmask));
881a92ad425SStefano Zampini   if (x && b) { /* this code is buggy when the row and column layout don't match */
882a92ad425SStefano Zampini     PetscBool cong;
883a92ad425SStefano Zampini 
8849566063dSJacob Faibussowitsch     PetscCall(MatHasCongruentLayouts(A,&cong));
88528b400f6SJacob Faibussowitsch     PetscCheck(cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
8869566063dSJacob Faibussowitsch     PetscCall(VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD));
8879566063dSJacob Faibussowitsch     PetscCall(VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD));
8889566063dSJacob Faibussowitsch     PetscCall(VecGetArrayRead(l->lvec,&xx));
8899566063dSJacob Faibussowitsch     PetscCall(VecGetArray(b,&bb));
890377aa5a1SBarry Smith   }
8919566063dSJacob Faibussowitsch   PetscCall(VecGetArray(lmask,&mask));
892564f14d6SBarry Smith   /* remove zeroed rows of off diagonal matrix */
8939566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArray(l->B,&aij_a));
894564f14d6SBarry Smith   ii = aij->i;
89554bd4135SMatthew G. Knepley   for (i=0; i<len; i++) {
8969566063dSJacob Faibussowitsch     PetscCall(PetscArrayzero(aij_a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]));
8979c7c4993SBarry Smith   }
898564f14d6SBarry Smith   /* loop over all elements of off process part of matrix zeroing removed columns*/
899564f14d6SBarry Smith   if (aij->compressedrow.use) {
900564f14d6SBarry Smith     m    = aij->compressedrow.nrows;
901564f14d6SBarry Smith     ii   = aij->compressedrow.i;
902564f14d6SBarry Smith     ridx = aij->compressedrow.rindex;
903564f14d6SBarry Smith     for (i=0; i<m; i++) {
904564f14d6SBarry Smith       n  = ii[i+1] - ii[i];
905564f14d6SBarry Smith       aj = aij->j + ii[i];
906fff043a9SJunchao Zhang       aa = aij_a + ii[i];
907564f14d6SBarry Smith 
908564f14d6SBarry Smith       for (j=0; j<n; j++) {
90925266a92SSatish Balay         if (PetscAbsScalar(mask[*aj])) {
910377aa5a1SBarry Smith           if (b) bb[*ridx] -= *aa*xx[*aj];
911564f14d6SBarry Smith           *aa = 0.0;
912564f14d6SBarry Smith         }
913564f14d6SBarry Smith         aa++;
914564f14d6SBarry Smith         aj++;
915564f14d6SBarry Smith       }
916564f14d6SBarry Smith       ridx++;
917564f14d6SBarry Smith     }
918564f14d6SBarry Smith   } else { /* do not use compressed row format */
919564f14d6SBarry Smith     m = l->B->rmap->n;
920564f14d6SBarry Smith     for (i=0; i<m; i++) {
921564f14d6SBarry Smith       n  = ii[i+1] - ii[i];
922564f14d6SBarry Smith       aj = aij->j + ii[i];
923fff043a9SJunchao Zhang       aa = aij_a + ii[i];
924564f14d6SBarry Smith       for (j=0; j<n; j++) {
92525266a92SSatish Balay         if (PetscAbsScalar(mask[*aj])) {
926377aa5a1SBarry Smith           if (b) bb[i] -= *aa*xx[*aj];
927564f14d6SBarry Smith           *aa = 0.0;
928564f14d6SBarry Smith         }
929564f14d6SBarry Smith         aa++;
930564f14d6SBarry Smith         aj++;
931564f14d6SBarry Smith       }
932564f14d6SBarry Smith     }
933564f14d6SBarry Smith   }
934a92ad425SStefano Zampini   if (x && b) {
9359566063dSJacob Faibussowitsch     PetscCall(VecRestoreArray(b,&bb));
9369566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayRead(l->lvec,&xx));
937377aa5a1SBarry Smith   }
9389566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArray(l->B,&aij_a));
9399566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(lmask,&mask));
9409566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&lmask));
9419566063dSJacob Faibussowitsch   PetscCall(PetscFree(lrows));
9424f9cfa9eSBarry Smith 
9434f9cfa9eSBarry Smith   /* only change matrix nonzero state if pattern was allowed to be changed */
9444f9cfa9eSBarry Smith   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
9454f9cfa9eSBarry Smith     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
9461c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A)));
9474f9cfa9eSBarry Smith   }
9489c7c4993SBarry Smith   PetscFunctionReturn(0);
9499c7c4993SBarry Smith }
9509c7c4993SBarry Smith 
951dfbe8321SBarry Smith PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
9521eb62cbbSBarry Smith {
953416022c9SBarry Smith   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
954b1d57f15SBarry Smith   PetscInt       nt;
95519b3b6edSHong Zhang   VecScatter     Mvctx = a->Mvctx;
956416022c9SBarry Smith 
9573a40ed3dSBarry Smith   PetscFunctionBegin;
9589566063dSJacob Faibussowitsch   PetscCall(VecGetLocalSize(xx,&nt));
95908401ef6SPierre Jolivet   PetscCheck(nt == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")",A->cmap->n,nt);
9609566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
9619566063dSJacob Faibussowitsch   PetscCall((*a->A->ops->mult)(a->A,xx,yy));
9629566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
9639566063dSJacob Faibussowitsch   PetscCall((*a->B->ops->multadd)(a->B,a->lvec,yy,yy));
9643a40ed3dSBarry Smith   PetscFunctionReturn(0);
9651eb62cbbSBarry Smith }
9661eb62cbbSBarry Smith 
967bd0c2dcbSBarry Smith PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
968bd0c2dcbSBarry Smith {
969bd0c2dcbSBarry Smith   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
970bd0c2dcbSBarry Smith 
971bd0c2dcbSBarry Smith   PetscFunctionBegin;
9729566063dSJacob Faibussowitsch   PetscCall(MatMultDiagonalBlock(a->A,bb,xx));
973bd0c2dcbSBarry Smith   PetscFunctionReturn(0);
974bd0c2dcbSBarry Smith }
975bd0c2dcbSBarry Smith 
976dfbe8321SBarry Smith PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
977da3a660dSBarry Smith {
978416022c9SBarry Smith   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
97901ad2aeeSHong Zhang   VecScatter     Mvctx = a->Mvctx;
9803a40ed3dSBarry Smith 
9813a40ed3dSBarry Smith   PetscFunctionBegin;
9829566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
9839566063dSJacob Faibussowitsch   PetscCall((*a->A->ops->multadd)(a->A,xx,yy,zz));
9849566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
9859566063dSJacob Faibussowitsch   PetscCall((*a->B->ops->multadd)(a->B,a->lvec,zz,zz));
9863a40ed3dSBarry Smith   PetscFunctionReturn(0);
987da3a660dSBarry Smith }
988da3a660dSBarry Smith 
989dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
990da3a660dSBarry Smith {
991416022c9SBarry Smith   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
992da3a660dSBarry Smith 
9933a40ed3dSBarry Smith   PetscFunctionBegin;
994da3a660dSBarry Smith   /* do nondiagonal part */
9959566063dSJacob Faibussowitsch   PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec));
996da3a660dSBarry Smith   /* do local part */
9979566063dSJacob Faibussowitsch   PetscCall((*a->A->ops->multtranspose)(a->A,xx,yy));
9989613dc34SJunchao Zhang   /* add partial results together */
9999566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE));
10009566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE));
10013a40ed3dSBarry Smith   PetscFunctionReturn(0);
1002da3a660dSBarry Smith }
1003da3a660dSBarry Smith 
10047087cfbeSBarry Smith PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1005cd0d46ebSvictorle {
10064f423910Svictorle   MPI_Comm       comm;
1007cd0d46ebSvictorle   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
100866501d38Svictorle   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1009cd0d46ebSvictorle   IS             Me,Notme;
1010b1d57f15SBarry Smith   PetscInt       M,N,first,last,*notme,i;
101154d735aeSStefano Zampini   PetscBool      lf;
1012b1d57f15SBarry Smith   PetscMPIInt    size;
1013cd0d46ebSvictorle 
1014cd0d46ebSvictorle   PetscFunctionBegin;
101542e5f5b4Svictorle   /* Easy test: symmetric diagonal block */
101666501d38Svictorle   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
10179566063dSJacob Faibussowitsch   PetscCall(MatIsTranspose(Adia,Bdia,tol,&lf));
10181c2dc1cbSBarry Smith   PetscCall(MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat)));
1019cd0d46ebSvictorle   if (!*f) PetscFunctionReturn(0);
10209566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)Amat,&comm));
10219566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm,&size));
1022b1d57f15SBarry Smith   if (size == 1) PetscFunctionReturn(0);
102342e5f5b4Svictorle 
10247dae84e0SHong Zhang   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
10259566063dSJacob Faibussowitsch   PetscCall(MatGetSize(Amat,&M,&N));
10269566063dSJacob Faibussowitsch   PetscCall(MatGetOwnershipRange(Amat,&first,&last));
10279566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(N-last+first,&notme));
1028cd0d46ebSvictorle   for (i=0; i<first; i++) notme[i] = i;
1029cd0d46ebSvictorle   for (i=last; i<M; i++) notme[i-last+first] = i;
10309566063dSJacob Faibussowitsch   PetscCall(ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme));
10319566063dSJacob Faibussowitsch   PetscCall(ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me));
10329566063dSJacob Faibussowitsch   PetscCall(MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs));
103366501d38Svictorle   Aoff = Aoffs[0];
10349566063dSJacob Faibussowitsch   PetscCall(MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs));
103566501d38Svictorle   Boff = Boffs[0];
10369566063dSJacob Faibussowitsch   PetscCall(MatIsTranspose(Aoff,Boff,tol,f));
10379566063dSJacob Faibussowitsch   PetscCall(MatDestroyMatrices(1,&Aoffs));
10389566063dSJacob Faibussowitsch   PetscCall(MatDestroyMatrices(1,&Boffs));
10399566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&Me));
10409566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&Notme));
10419566063dSJacob Faibussowitsch   PetscCall(PetscFree(notme));
1042cd0d46ebSvictorle   PetscFunctionReturn(0);
1043cd0d46ebSvictorle }
1044cd0d46ebSvictorle 
1045a3bbdb47SHong Zhang PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1046a3bbdb47SHong Zhang {
1047a3bbdb47SHong Zhang   PetscFunctionBegin;
10489566063dSJacob Faibussowitsch   PetscCall(MatIsTranspose_MPIAIJ(A,A,tol,f));
1049a3bbdb47SHong Zhang   PetscFunctionReturn(0);
1050a3bbdb47SHong Zhang }
1051a3bbdb47SHong Zhang 
1052dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1053da3a660dSBarry Smith {
1054416022c9SBarry Smith   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1055da3a660dSBarry Smith 
10563a40ed3dSBarry Smith   PetscFunctionBegin;
1057da3a660dSBarry Smith   /* do nondiagonal part */
10589566063dSJacob Faibussowitsch   PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec));
1059da3a660dSBarry Smith   /* do local part */
10609566063dSJacob Faibussowitsch   PetscCall((*a->A->ops->multtransposeadd)(a->A,xx,yy,zz));
10619613dc34SJunchao Zhang   /* add partial results together */
10629566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE));
10639566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE));
10643a40ed3dSBarry Smith   PetscFunctionReturn(0);
1065da3a660dSBarry Smith }
1066da3a660dSBarry Smith 
10671eb62cbbSBarry Smith /*
10681eb62cbbSBarry Smith   This only works correctly for square matrices where the subblock A->A is the
10691eb62cbbSBarry Smith    diagonal block
10701eb62cbbSBarry Smith */
1071dfbe8321SBarry Smith PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
10721eb62cbbSBarry Smith {
1073416022c9SBarry Smith   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
10743a40ed3dSBarry Smith 
10753a40ed3dSBarry Smith   PetscFunctionBegin;
107608401ef6SPierre Jolivet   PetscCheck(A->rmap->N == A->cmap->N,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1077*aed4548fSBarry Smith   PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
10789566063dSJacob Faibussowitsch   PetscCall(MatGetDiagonal(a->A,v));
10793a40ed3dSBarry Smith   PetscFunctionReturn(0);
10801eb62cbbSBarry Smith }
10811eb62cbbSBarry Smith 
1082f4df32b1SMatthew Knepley PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1083052efed2SBarry Smith {
1084052efed2SBarry Smith   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
10853a40ed3dSBarry Smith 
10863a40ed3dSBarry Smith   PetscFunctionBegin;
10879566063dSJacob Faibussowitsch   PetscCall(MatScale(a->A,aa));
10889566063dSJacob Faibussowitsch   PetscCall(MatScale(a->B,aa));
10893a40ed3dSBarry Smith   PetscFunctionReturn(0);
1090052efed2SBarry Smith }
1091052efed2SBarry Smith 
1092cbc6b225SStefano Zampini /* Free COO stuff; must match allocation methods in MatSetPreallocationCOO_MPIAIJ() */
1093cbc6b225SStefano Zampini PETSC_INTERN PetscErrorCode MatResetPreallocationCOO_MPIAIJ(Mat mat)
1094cbc6b225SStefano Zampini {
1095cbc6b225SStefano Zampini   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1096cbc6b225SStefano Zampini 
1097cbc6b225SStefano Zampini   PetscFunctionBegin;
10989566063dSJacob Faibussowitsch   PetscCall(PetscSFDestroy(&aij->coo_sf));
1099158ec288SJunchao Zhang   PetscCall(PetscFree(aij->Aperm1));
1100158ec288SJunchao Zhang   PetscCall(PetscFree(aij->Bperm1));
1101158ec288SJunchao Zhang   PetscCall(PetscFree(aij->Ajmap1));
1102158ec288SJunchao Zhang   PetscCall(PetscFree(aij->Bjmap1));
1103158ec288SJunchao Zhang 
1104158ec288SJunchao Zhang   PetscCall(PetscFree(aij->Aimap2));
1105158ec288SJunchao Zhang   PetscCall(PetscFree(aij->Bimap2));
1106158ec288SJunchao Zhang   PetscCall(PetscFree(aij->Aperm2));
1107158ec288SJunchao Zhang   PetscCall(PetscFree(aij->Bperm2));
1108158ec288SJunchao Zhang   PetscCall(PetscFree(aij->Ajmap2));
1109158ec288SJunchao Zhang   PetscCall(PetscFree(aij->Bjmap2));
1110158ec288SJunchao Zhang 
11119566063dSJacob Faibussowitsch   PetscCall(PetscFree2(aij->sendbuf,aij->recvbuf));
11129566063dSJacob Faibussowitsch   PetscCall(PetscFree(aij->Cperm1));
1113cbc6b225SStefano Zampini   PetscFunctionReturn(0);
1114cbc6b225SStefano Zampini }
1115cbc6b225SStefano Zampini 
1116dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
11171eb62cbbSBarry Smith {
111844a69424SLois Curfman McInnes   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
111983e2fdc7SBarry Smith 
11203a40ed3dSBarry Smith   PetscFunctionBegin;
1121aa482453SBarry Smith #if defined(PETSC_USE_LOG)
1122c0aa6a63SJacob Faibussowitsch   PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N);
1123a5a9c739SBarry Smith #endif
11249566063dSJacob Faibussowitsch   PetscCall(MatStashDestroy_Private(&mat->stash));
11259566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&aij->diag));
11269566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&aij->A));
11279566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&aij->B));
1128aa482453SBarry Smith #if defined(PETSC_USE_CTABLE)
11299566063dSJacob Faibussowitsch   PetscCall(PetscTableDestroy(&aij->colmap));
1130b1fc9764SSatish Balay #else
11319566063dSJacob Faibussowitsch   PetscCall(PetscFree(aij->colmap));
1132b1fc9764SSatish Balay #endif
11339566063dSJacob Faibussowitsch   PetscCall(PetscFree(aij->garray));
11349566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&aij->lvec));
11359566063dSJacob Faibussowitsch   PetscCall(VecScatterDestroy(&aij->Mvctx));
11369566063dSJacob Faibussowitsch   PetscCall(PetscFree2(aij->rowvalues,aij->rowindices));
11379566063dSJacob Faibussowitsch   PetscCall(PetscFree(aij->ld));
1138394ed5ebSJunchao Zhang 
1139cbc6b225SStefano Zampini   /* Free COO */
11409566063dSJacob Faibussowitsch   PetscCall(MatResetPreallocationCOO_MPIAIJ(mat));
1141394ed5ebSJunchao Zhang 
11429566063dSJacob Faibussowitsch   PetscCall(PetscFree(mat->data));
1143901853e0SKris Buschelman 
11446718818eSStefano Zampini   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
11459566063dSJacob Faibussowitsch   PetscCall(PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL));
11466718818eSStefano Zampini 
11479566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)mat,NULL));
11489566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL));
11499566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL));
11509566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL));
11519566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL));
11529566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL));
11539566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL));
11549566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL));
11559566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL));
11569566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL));
11573d0639e7SStefano Zampini #if defined(PETSC_HAVE_CUDA)
11589566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL));
11593d0639e7SStefano Zampini #endif
11603d0639e7SStefano Zampini #if defined(PETSC_HAVE_KOKKOS_KERNELS)
11619566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL));
11623d0639e7SStefano Zampini #endif
11639566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL));
11645d7652ecSHong Zhang #if defined(PETSC_HAVE_ELEMENTAL)
11659566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL));
11665d7652ecSHong Zhang #endif
1167d24d4204SJose E. Roman #if defined(PETSC_HAVE_SCALAPACK)
11689566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL));
1169d24d4204SJose E. Roman #endif
117063c07aadSStefano Zampini #if defined(PETSC_HAVE_HYPRE)
11719566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL));
11729566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL));
117363c07aadSStefano Zampini #endif
11749566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL));
11759566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL));
11769566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL));
11779566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL));
11789566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL));
11799566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL));
11803d0639e7SStefano Zampini #if defined(PETSC_HAVE_MKL_SPARSE)
11819566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL));
11823d0639e7SStefano Zampini #endif
11839566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL));
11849566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL));
11859566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL));
11869566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetPreallocationCOO_C",NULL));
11879566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetValuesCOO_C",NULL));
11883a40ed3dSBarry Smith   PetscFunctionReturn(0);
11891eb62cbbSBarry Smith }
1190ee50ffe9SBarry Smith 
1191dfbe8321SBarry Smith PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
11928e2fed03SBarry Smith {
11938e2fed03SBarry Smith   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
11948e2fed03SBarry Smith   Mat_SeqAIJ        *A   = (Mat_SeqAIJ*)aij->A->data;
11958e2fed03SBarry Smith   Mat_SeqAIJ        *B   = (Mat_SeqAIJ*)aij->B->data;
11963ea6fe3dSLisandro Dalcin   const PetscInt    *garray = aij->garray;
11972e5835c6SStefano Zampini   const PetscScalar *aa,*ba;
11983ea6fe3dSLisandro Dalcin   PetscInt          header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb;
11993ea6fe3dSLisandro Dalcin   PetscInt          *rowlens;
12003ea6fe3dSLisandro Dalcin   PetscInt          *colidxs;
12013ea6fe3dSLisandro Dalcin   PetscScalar       *matvals;
12028e2fed03SBarry Smith 
12038e2fed03SBarry Smith   PetscFunctionBegin;
12049566063dSJacob Faibussowitsch   PetscCall(PetscViewerSetUp(viewer));
12053ea6fe3dSLisandro Dalcin 
12063ea6fe3dSLisandro Dalcin   M  = mat->rmap->N;
12073ea6fe3dSLisandro Dalcin   N  = mat->cmap->N;
12083ea6fe3dSLisandro Dalcin   m  = mat->rmap->n;
12093ea6fe3dSLisandro Dalcin   rs = mat->rmap->rstart;
12103ea6fe3dSLisandro Dalcin   cs = mat->cmap->rstart;
12118e2fed03SBarry Smith   nz = A->nz + B->nz;
12123ea6fe3dSLisandro Dalcin 
12133ea6fe3dSLisandro Dalcin   /* write matrix header */
12140700a824SBarry Smith   header[0] = MAT_FILE_CLASSID;
12153ea6fe3dSLisandro Dalcin   header[1] = M; header[2] = N; header[3] = nz;
12169566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat)));
12179566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryWrite(viewer,header,4,PETSC_INT));
12188e2fed03SBarry Smith 
12193ea6fe3dSLisandro Dalcin   /* fill in and store row lengths  */
12209566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(m,&rowlens));
12213ea6fe3dSLisandro Dalcin   for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
12229566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT));
12239566063dSJacob Faibussowitsch   PetscCall(PetscFree(rowlens));
12248e2fed03SBarry Smith 
12253ea6fe3dSLisandro Dalcin   /* fill in and store column indices */
12269566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nz,&colidxs));
12273ea6fe3dSLisandro Dalcin   for (cnt=0, i=0; i<m; i++) {
12283ea6fe3dSLisandro Dalcin     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
12293ea6fe3dSLisandro Dalcin       if (garray[B->j[jb]] > cs) break;
12303ea6fe3dSLisandro Dalcin       colidxs[cnt++] = garray[B->j[jb]];
12318e2fed03SBarry Smith     }
12323ea6fe3dSLisandro Dalcin     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
12333ea6fe3dSLisandro Dalcin       colidxs[cnt++] = A->j[ja] + cs;
12343ea6fe3dSLisandro Dalcin     for (; jb<B->i[i+1]; jb++)
12353ea6fe3dSLisandro Dalcin       colidxs[cnt++] = garray[B->j[jb]];
12368e2fed03SBarry Smith   }
123708401ef6SPierre Jolivet   PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz);
12389566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT));
12399566063dSJacob Faibussowitsch   PetscCall(PetscFree(colidxs));
12408e2fed03SBarry Smith 
12413ea6fe3dSLisandro Dalcin   /* fill in and store nonzero values */
12429566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayRead(aij->A,&aa));
12439566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayRead(aij->B,&ba));
12449566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nz,&matvals));
12453ea6fe3dSLisandro Dalcin   for (cnt=0, i=0; i<m; i++) {
12463ea6fe3dSLisandro Dalcin     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
12473ea6fe3dSLisandro Dalcin       if (garray[B->j[jb]] > cs) break;
12482e5835c6SStefano Zampini       matvals[cnt++] = ba[jb];
12498e2fed03SBarry Smith     }
12503ea6fe3dSLisandro Dalcin     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
12512e5835c6SStefano Zampini       matvals[cnt++] = aa[ja];
12523ea6fe3dSLisandro Dalcin     for (; jb<B->i[i+1]; jb++)
12532e5835c6SStefano Zampini       matvals[cnt++] = ba[jb];
12548e2fed03SBarry Smith   }
12559566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&aa));
12569566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&ba));
125708401ef6SPierre Jolivet   PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz);
12589566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR));
12599566063dSJacob Faibussowitsch   PetscCall(PetscFree(matvals));
12608e2fed03SBarry Smith 
12613ea6fe3dSLisandro Dalcin   /* write block size option to the viewer's .info file */
12629566063dSJacob Faibussowitsch   PetscCall(MatView_Binary_BlockSizes(mat,viewer));
12638e2fed03SBarry Smith   PetscFunctionReturn(0);
12648e2fed03SBarry Smith }
12658e2fed03SBarry Smith 
12669804daf3SBarry Smith #include <petscdraw.h>
1267dfbe8321SBarry Smith PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1268416022c9SBarry Smith {
126944a69424SLois Curfman McInnes   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
127032dcc486SBarry Smith   PetscMPIInt       rank = aij->rank,size = aij->size;
1271ace3abfcSBarry Smith   PetscBool         isdraw,iascii,isbinary;
1272b0a32e0cSBarry Smith   PetscViewer       sviewer;
1273f3ef73ceSBarry Smith   PetscViewerFormat format;
1274416022c9SBarry Smith 
12753a40ed3dSBarry Smith   PetscFunctionBegin;
12769566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
12779566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
12789566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
127932077d6dSBarry Smith   if (iascii) {
12809566063dSJacob Faibussowitsch     PetscCall(PetscViewerGetFormat(viewer,&format));
1281ef5fdb51SBarry Smith     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1282ef5fdb51SBarry Smith       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
12839566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(size,&nz));
12849566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat)));
1285ef5fdb51SBarry Smith       for (i=0; i<(PetscInt)size; i++) {
1286ef5fdb51SBarry Smith         nmax = PetscMax(nmax,nz[i]);
1287ef5fdb51SBarry Smith         nmin = PetscMin(nmin,nz[i]);
1288ef5fdb51SBarry Smith         navg += nz[i];
1289ef5fdb51SBarry Smith       }
12909566063dSJacob Faibussowitsch       PetscCall(PetscFree(nz));
1291ef5fdb51SBarry Smith       navg = navg/size;
12929566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %" PetscInt_FMT "  avg %" PetscInt_FMT "  max %" PetscInt_FMT "\n",nmin,navg,nmax));
1293ef5fdb51SBarry Smith       PetscFunctionReturn(0);
1294ef5fdb51SBarry Smith     }
12959566063dSJacob Faibussowitsch     PetscCall(PetscViewerGetFormat(viewer,&format));
1296456192e2SBarry Smith     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
12974e220ebcSLois Curfman McInnes       MatInfo   info;
12986335e310SSatish Balay       PetscInt *inodes=NULL;
1299923f20ffSKris Buschelman 
13009566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank));
13019566063dSJacob Faibussowitsch       PetscCall(MatGetInfo(mat,MAT_LOCAL,&info));
13029566063dSJacob Faibussowitsch       PetscCall(MatInodeGetInodeSizes(aij->A,NULL,&inodes,NULL));
13039566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1304923f20ffSKris Buschelman       if (!inodes) {
13059566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n",
13065f80ce2aSJacob Faibussowitsch                                                    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory));
13076831982aSBarry Smith       } else {
13089566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n",
13095f80ce2aSJacob Faibussowitsch                                                    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory));
13106831982aSBarry Smith       }
13119566063dSJacob Faibussowitsch       PetscCall(MatGetInfo(aij->A,MAT_LOCAL,&info));
13129566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used));
13139566063dSJacob Faibussowitsch       PetscCall(MatGetInfo(aij->B,MAT_LOCAL,&info));
13149566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used));
13159566063dSJacob Faibussowitsch       PetscCall(PetscViewerFlush(viewer));
13169566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
13179566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n"));
13189566063dSJacob Faibussowitsch       PetscCall(VecScatterView(aij->Mvctx,viewer));
13193a40ed3dSBarry Smith       PetscFunctionReturn(0);
1320fb9695e5SSatish Balay     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1321923f20ffSKris Buschelman       PetscInt inodecount,inodelimit,*inodes;
13229566063dSJacob Faibussowitsch       PetscCall(MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit));
1323923f20ffSKris Buschelman       if (inodes) {
13249566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n",inodecount,inodelimit));
1325d38fa0fbSBarry Smith       } else {
13269566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n"));
1327d38fa0fbSBarry Smith       }
13283a40ed3dSBarry Smith       PetscFunctionReturn(0);
13294aedb280SBarry Smith     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
13304aedb280SBarry Smith       PetscFunctionReturn(0);
133108480c60SBarry Smith     }
13328e2fed03SBarry Smith   } else if (isbinary) {
13338e2fed03SBarry Smith     if (size == 1) {
13349566063dSJacob Faibussowitsch       PetscCall(PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name));
13359566063dSJacob Faibussowitsch       PetscCall(MatView(aij->A,viewer));
13368e2fed03SBarry Smith     } else {
13379566063dSJacob Faibussowitsch       PetscCall(MatView_MPIAIJ_Binary(mat,viewer));
13388e2fed03SBarry Smith     }
13398e2fed03SBarry Smith     PetscFunctionReturn(0);
134071e56450SStefano Zampini   } else if (iascii && size == 1) {
13419566063dSJacob Faibussowitsch     PetscCall(PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name));
13429566063dSJacob Faibussowitsch     PetscCall(MatView(aij->A,viewer));
134371e56450SStefano Zampini     PetscFunctionReturn(0);
13440f5bd95cSBarry Smith   } else if (isdraw) {
1345b0a32e0cSBarry Smith     PetscDraw draw;
1346ace3abfcSBarry Smith     PetscBool isnull;
13479566063dSJacob Faibussowitsch     PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw));
13489566063dSJacob Faibussowitsch     PetscCall(PetscDrawIsNull(draw,&isnull));
1349383922c3SLisandro Dalcin     if (isnull) PetscFunctionReturn(0);
135019bcc07fSBarry Smith   }
135119bcc07fSBarry Smith 
135271e56450SStefano Zampini   { /* assemble the entire matrix onto first processor */
135371e56450SStefano Zampini     Mat A = NULL, Av;
135471e56450SStefano Zampini     IS  isrow,iscol;
13552ee70a88SLois Curfman McInnes 
13569566063dSJacob Faibussowitsch     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
13579566063dSJacob Faibussowitsch     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
13589566063dSJacob Faibussowitsch     PetscCall(MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A));
13599566063dSJacob Faibussowitsch     PetscCall(MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL));
136071e56450SStefano Zampini /*  The commented code uses MatCreateSubMatrices instead */
136171e56450SStefano Zampini /*
136271e56450SStefano Zampini     Mat *AA, A = NULL, Av;
136371e56450SStefano Zampini     IS  isrow,iscol;
136471e56450SStefano Zampini 
13659566063dSJacob Faibussowitsch     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
13669566063dSJacob Faibussowitsch     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
13679566063dSJacob Faibussowitsch     PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA));
1368dd400576SPatrick Sanan     if (rank == 0) {
13699566063dSJacob Faibussowitsch        PetscCall(PetscObjectReference((PetscObject)AA[0]));
137071e56450SStefano Zampini        A    = AA[0];
137171e56450SStefano Zampini        Av   = AA[0];
137295373324SBarry Smith     }
13739566063dSJacob Faibussowitsch     PetscCall(MatDestroySubMatrices(1,&AA));
137471e56450SStefano Zampini */
13759566063dSJacob Faibussowitsch     PetscCall(ISDestroy(&iscol));
13769566063dSJacob Faibussowitsch     PetscCall(ISDestroy(&isrow));
137755843e3eSBarry Smith     /*
137855843e3eSBarry Smith        Everyone has to call to draw the matrix since the graphics waits are
1379b0a32e0cSBarry Smith        synchronized across all processors that share the PetscDraw object
138055843e3eSBarry Smith     */
13819566063dSJacob Faibussowitsch     PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
1382dd400576SPatrick Sanan     if (rank == 0) {
138371e56450SStefano Zampini       if (((PetscObject)mat)->name) {
13849566063dSJacob Faibussowitsch         PetscCall(PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name));
138571e56450SStefano Zampini       }
13869566063dSJacob Faibussowitsch       PetscCall(MatView_SeqAIJ(Av,sviewer));
138795373324SBarry Smith     }
13889566063dSJacob Faibussowitsch     PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
13899566063dSJacob Faibussowitsch     PetscCall(PetscViewerFlush(viewer));
13909566063dSJacob Faibussowitsch     PetscCall(MatDestroy(&A));
139195373324SBarry Smith   }
13923a40ed3dSBarry Smith   PetscFunctionReturn(0);
13931eb62cbbSBarry Smith }
13941eb62cbbSBarry Smith 
1395dfbe8321SBarry Smith PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1396416022c9SBarry Smith {
1397ace3abfcSBarry Smith   PetscBool      iascii,isdraw,issocket,isbinary;
1398416022c9SBarry Smith 
13993a40ed3dSBarry Smith   PetscFunctionBegin;
14009566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
14019566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
14029566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
14039566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket));
140432077d6dSBarry Smith   if (iascii || isdraw || isbinary || issocket) {
14059566063dSJacob Faibussowitsch     PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer));
1406416022c9SBarry Smith   }
14073a40ed3dSBarry Smith   PetscFunctionReturn(0);
1408416022c9SBarry Smith }
1409416022c9SBarry Smith 
141041f059aeSBarry Smith PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
14118a729477SBarry Smith {
141244a69424SLois Curfman McInnes   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1413f4259b30SLisandro Dalcin   Vec            bb1 = NULL;
1414ace3abfcSBarry Smith   PetscBool      hasop;
14158a729477SBarry Smith 
14163a40ed3dSBarry Smith   PetscFunctionBegin;
1417a2b30743SBarry Smith   if (flag == SOR_APPLY_UPPER) {
14189566063dSJacob Faibussowitsch     PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
1419a2b30743SBarry Smith     PetscFunctionReturn(0);
1420a2b30743SBarry Smith   }
1421a2b30743SBarry Smith 
14224e980039SJed Brown   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
14239566063dSJacob Faibussowitsch     PetscCall(VecDuplicate(bb,&bb1));
14244e980039SJed Brown   }
14254e980039SJed Brown 
1426c16cb8f2SBarry Smith   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1427da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
14289566063dSJacob Faibussowitsch       PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
14292798e883SHong Zhang       its--;
1430da3a660dSBarry Smith     }
14312798e883SHong Zhang 
14322798e883SHong Zhang     while (its--) {
14339566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
14349566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
14352798e883SHong Zhang 
1436c14dc6b6SHong Zhang       /* update rhs: bb1 = bb - B*x */
14379566063dSJacob Faibussowitsch       PetscCall(VecScale(mat->lvec,-1.0));
14389566063dSJacob Faibussowitsch       PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1));
14392798e883SHong Zhang 
1440c14dc6b6SHong Zhang       /* local sweep */
14419566063dSJacob Faibussowitsch       PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx));
14422798e883SHong Zhang     }
14433a40ed3dSBarry Smith   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1444da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
14459566063dSJacob Faibussowitsch       PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
14462798e883SHong Zhang       its--;
1447da3a660dSBarry Smith     }
14482798e883SHong Zhang     while (its--) {
14499566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
14509566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
14512798e883SHong Zhang 
1452c14dc6b6SHong Zhang       /* update rhs: bb1 = bb - B*x */
14539566063dSJacob Faibussowitsch       PetscCall(VecScale(mat->lvec,-1.0));
14549566063dSJacob Faibussowitsch       PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1));
1455c14dc6b6SHong Zhang 
1456c14dc6b6SHong Zhang       /* local sweep */
14579566063dSJacob Faibussowitsch       PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx));
14582798e883SHong Zhang     }
14593a40ed3dSBarry Smith   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1460da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
14619566063dSJacob Faibussowitsch       PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
14622798e883SHong Zhang       its--;
1463da3a660dSBarry Smith     }
14642798e883SHong Zhang     while (its--) {
14659566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
14669566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
14672798e883SHong Zhang 
1468c14dc6b6SHong Zhang       /* update rhs: bb1 = bb - B*x */
14699566063dSJacob Faibussowitsch       PetscCall(VecScale(mat->lvec,-1.0));
14709566063dSJacob Faibussowitsch       PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1));
14712798e883SHong Zhang 
1472c14dc6b6SHong Zhang       /* local sweep */
14739566063dSJacob Faibussowitsch       PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx));
14742798e883SHong Zhang     }
1475a7420bb7SBarry Smith   } else if (flag & SOR_EISENSTAT) {
1476a7420bb7SBarry Smith     Vec xx1;
1477a7420bb7SBarry Smith 
14789566063dSJacob Faibussowitsch     PetscCall(VecDuplicate(bb,&xx1));
14799566063dSJacob Faibussowitsch     PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx));
1480a7420bb7SBarry Smith 
14819566063dSJacob Faibussowitsch     PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
14829566063dSJacob Faibussowitsch     PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1483a7420bb7SBarry Smith     if (!mat->diag) {
14849566063dSJacob Faibussowitsch       PetscCall(MatCreateVecs(matin,&mat->diag,NULL));
14859566063dSJacob Faibussowitsch       PetscCall(MatGetDiagonal(matin,mat->diag));
1486a7420bb7SBarry Smith     }
14879566063dSJacob Faibussowitsch     PetscCall(MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop));
1488bd0c2dcbSBarry Smith     if (hasop) {
14899566063dSJacob Faibussowitsch       PetscCall(MatMultDiagonalBlock(matin,xx,bb1));
1490bd0c2dcbSBarry Smith     } else {
14919566063dSJacob Faibussowitsch       PetscCall(VecPointwiseMult(bb1,mat->diag,xx));
1492bd0c2dcbSBarry Smith     }
14939566063dSJacob Faibussowitsch     PetscCall(VecAYPX(bb1,(omega-2.0)/omega,bb));
1494887ee2caSBarry Smith 
14959566063dSJacob Faibussowitsch     PetscCall(MatMultAdd(mat->B,mat->lvec,bb1,bb1));
1496a7420bb7SBarry Smith 
1497a7420bb7SBarry Smith     /* local sweep */
14989566063dSJacob Faibussowitsch     PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1));
14999566063dSJacob Faibussowitsch     PetscCall(VecAXPY(xx,1.0,xx1));
15009566063dSJacob Faibussowitsch     PetscCall(VecDestroy(&xx1));
1501ce94432eSBarry Smith   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1502c14dc6b6SHong Zhang 
15039566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&bb1));
1504a0808db4SHong Zhang 
15057b6c816cSBarry Smith   matin->factorerrortype = mat->A->factorerrortype;
15063a40ed3dSBarry Smith   PetscFunctionReturn(0);
15078a729477SBarry Smith }
1508a66be287SLois Curfman McInnes 
150942e855d1Svictor PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
151042e855d1Svictor {
151172e6a0cfSJed Brown   Mat            aA,aB,Aperm;
151272e6a0cfSJed Brown   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
151372e6a0cfSJed Brown   PetscScalar    *aa,*ba;
151472e6a0cfSJed Brown   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
151572e6a0cfSJed Brown   PetscSF        rowsf,sf;
15160298fd71SBarry Smith   IS             parcolp = NULL;
151772e6a0cfSJed Brown   PetscBool      done;
151842e855d1Svictor 
151942e855d1Svictor   PetscFunctionBegin;
15209566063dSJacob Faibussowitsch   PetscCall(MatGetLocalSize(A,&m,&n));
15219566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(rowp,&rwant));
15229566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(colp,&cwant));
15239566063dSJacob Faibussowitsch   PetscCall(PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest));
152472e6a0cfSJed Brown 
152572e6a0cfSJed Brown   /* Invert row permutation to find out where my rows should go */
15269566063dSJacob Faibussowitsch   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf));
15279566063dSJacob Faibussowitsch   PetscCall(PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant));
15289566063dSJacob Faibussowitsch   PetscCall(PetscSFSetFromOptions(rowsf));
152972e6a0cfSJed Brown   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
15309566063dSJacob Faibussowitsch   PetscCall(PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE));
15319566063dSJacob Faibussowitsch   PetscCall(PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE));
153272e6a0cfSJed Brown 
153372e6a0cfSJed Brown   /* Invert column permutation to find out where my columns should go */
15349566063dSJacob Faibussowitsch   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf));
15359566063dSJacob Faibussowitsch   PetscCall(PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant));
15369566063dSJacob Faibussowitsch   PetscCall(PetscSFSetFromOptions(sf));
153772e6a0cfSJed Brown   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
15389566063dSJacob Faibussowitsch   PetscCall(PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE));
15399566063dSJacob Faibussowitsch   PetscCall(PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE));
15409566063dSJacob Faibussowitsch   PetscCall(PetscSFDestroy(&sf));
154172e6a0cfSJed Brown 
15429566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(rowp,&rwant));
15439566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(colp,&cwant));
15449566063dSJacob Faibussowitsch   PetscCall(MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols));
154572e6a0cfSJed Brown 
154672e6a0cfSJed Brown   /* Find out where my gcols should go */
15479566063dSJacob Faibussowitsch   PetscCall(MatGetSize(aB,NULL,&ng));
15489566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(ng,&gcdest));
15499566063dSJacob Faibussowitsch   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf));
15509566063dSJacob Faibussowitsch   PetscCall(PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols));
15519566063dSJacob Faibussowitsch   PetscCall(PetscSFSetFromOptions(sf));
15529566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE));
15539566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE));
15549566063dSJacob Faibussowitsch   PetscCall(PetscSFDestroy(&sf));
155572e6a0cfSJed Brown 
15569566063dSJacob Faibussowitsch   PetscCall(PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz));
15579566063dSJacob Faibussowitsch   PetscCall(MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done));
15589566063dSJacob Faibussowitsch   PetscCall(MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done));
155972e6a0cfSJed Brown   for (i=0; i<m; i++) {
1560131c27b5Sprj-     PetscInt    row = rdest[i];
1561131c27b5Sprj-     PetscMPIInt rowner;
15629566063dSJacob Faibussowitsch     PetscCall(PetscLayoutFindOwner(A->rmap,row,&rowner));
156372e6a0cfSJed Brown     for (j=ai[i]; j<ai[i+1]; j++) {
1564131c27b5Sprj-       PetscInt    col = cdest[aj[j]];
1565131c27b5Sprj-       PetscMPIInt cowner;
15669566063dSJacob Faibussowitsch       PetscCall(PetscLayoutFindOwner(A->cmap,col,&cowner)); /* Could build an index for the columns to eliminate this search */
156772e6a0cfSJed Brown       if (rowner == cowner) dnnz[i]++;
156872e6a0cfSJed Brown       else onnz[i]++;
156972e6a0cfSJed Brown     }
157072e6a0cfSJed Brown     for (j=bi[i]; j<bi[i+1]; j++) {
1571131c27b5Sprj-       PetscInt    col = gcdest[bj[j]];
1572131c27b5Sprj-       PetscMPIInt cowner;
15739566063dSJacob Faibussowitsch       PetscCall(PetscLayoutFindOwner(A->cmap,col,&cowner));
157472e6a0cfSJed Brown       if (rowner == cowner) dnnz[i]++;
157572e6a0cfSJed Brown       else onnz[i]++;
157672e6a0cfSJed Brown     }
157772e6a0cfSJed Brown   }
15789566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE));
15799566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE));
15809566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE));
15819566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE));
15829566063dSJacob Faibussowitsch   PetscCall(PetscSFDestroy(&rowsf));
158372e6a0cfSJed Brown 
15849566063dSJacob Faibussowitsch   PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm));
15859566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArray(aA,&aa));
15869566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArray(aB,&ba));
158772e6a0cfSJed Brown   for (i=0; i<m; i++) {
158872e6a0cfSJed Brown     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1589970468b0SJed Brown     PetscInt j0,rowlen;
159072e6a0cfSJed Brown     rowlen = ai[i+1] - ai[i];
1591970468b0SJed Brown     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1592970468b0SJed Brown       for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
15939566063dSJacob Faibussowitsch       PetscCall(MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES));
1594970468b0SJed Brown     }
159572e6a0cfSJed Brown     rowlen = bi[i+1] - bi[i];
1596970468b0SJed Brown     for (j0=j=0; j<rowlen; j0=j) {
1597970468b0SJed Brown       for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
15989566063dSJacob Faibussowitsch       PetscCall(MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES));
1599970468b0SJed Brown     }
160072e6a0cfSJed Brown   }
16019566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY));
16029566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY));
16039566063dSJacob Faibussowitsch   PetscCall(MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done));
16049566063dSJacob Faibussowitsch   PetscCall(MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done));
16059566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArray(aA,&aa));
16069566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArray(aB,&ba));
16079566063dSJacob Faibussowitsch   PetscCall(PetscFree4(dnnz,onnz,tdnnz,tonnz));
16089566063dSJacob Faibussowitsch   PetscCall(PetscFree3(work,rdest,cdest));
16099566063dSJacob Faibussowitsch   PetscCall(PetscFree(gcdest));
16109566063dSJacob Faibussowitsch   if (parcolp) PetscCall(ISDestroy(&colp));
161172e6a0cfSJed Brown   *B = Aperm;
161242e855d1Svictor   PetscFunctionReturn(0);
161342e855d1Svictor }
161442e855d1Svictor 
1615c5e4d11fSDmitry Karpeev PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1616c5e4d11fSDmitry Karpeev {
1617c5e4d11fSDmitry Karpeev   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1618c5e4d11fSDmitry Karpeev 
1619c5e4d11fSDmitry Karpeev   PetscFunctionBegin;
16209566063dSJacob Faibussowitsch   PetscCall(MatGetSize(aij->B,NULL,nghosts));
1621c5e4d11fSDmitry Karpeev   if (ghosts) *ghosts = aij->garray;
1622c5e4d11fSDmitry Karpeev   PetscFunctionReturn(0);
1623c5e4d11fSDmitry Karpeev }
1624c5e4d11fSDmitry Karpeev 
1625dfbe8321SBarry Smith PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1626a66be287SLois Curfman McInnes {
1627a66be287SLois Curfman McInnes   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1628a66be287SLois Curfman McInnes   Mat            A    = mat->A,B = mat->B;
16293966268fSBarry Smith   PetscLogDouble isend[5],irecv[5];
1630a66be287SLois Curfman McInnes 
16313a40ed3dSBarry Smith   PetscFunctionBegin;
16324e220ebcSLois Curfman McInnes   info->block_size = 1.0;
16339566063dSJacob Faibussowitsch   PetscCall(MatGetInfo(A,MAT_LOCAL,info));
16342205254eSKarl Rupp 
16354e220ebcSLois Curfman McInnes   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
16364e220ebcSLois Curfman McInnes   isend[3] = info->memory;  isend[4] = info->mallocs;
16372205254eSKarl Rupp 
16389566063dSJacob Faibussowitsch   PetscCall(MatGetInfo(B,MAT_LOCAL,info));
16392205254eSKarl Rupp 
16404e220ebcSLois Curfman McInnes   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
16414e220ebcSLois Curfman McInnes   isend[3] += info->memory;  isend[4] += info->mallocs;
1642a66be287SLois Curfman McInnes   if (flag == MAT_LOCAL) {
16434e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
16444e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
16454e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
16464e220ebcSLois Curfman McInnes     info->memory       = isend[3];
16474e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
1648a66be287SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_MAX) {
16491c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin)));
16502205254eSKarl Rupp 
16514e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
16524e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
16534e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
16544e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
16554e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
1656a66be287SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_SUM) {
16571c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin)));
16582205254eSKarl Rupp 
16594e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
16604e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
16614e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
16624e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
16634e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
1664a66be287SLois Curfman McInnes   }
16654e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
16664e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
16674e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
16683a40ed3dSBarry Smith   PetscFunctionReturn(0);
1669a66be287SLois Curfman McInnes }
1670a66be287SLois Curfman McInnes 
1671ace3abfcSBarry Smith PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1672c74985f6SBarry Smith {
1673c0bbcb79SLois Curfman McInnes   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1674c74985f6SBarry Smith 
16753a40ed3dSBarry Smith   PetscFunctionBegin;
167612c028f9SKris Buschelman   switch (op) {
1677512a5fc5SBarry Smith   case MAT_NEW_NONZERO_LOCATIONS:
167812c028f9SKris Buschelman   case MAT_NEW_NONZERO_ALLOCATION_ERR:
167928b2fa4aSMatthew Knepley   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1680a9817697SBarry Smith   case MAT_KEEP_NONZERO_PATTERN:
168112c028f9SKris Buschelman   case MAT_NEW_NONZERO_LOCATION_ERR:
16820ad02fcaSStefano Zampini   case MAT_USE_INODES:
168312c028f9SKris Buschelman   case MAT_IGNORE_ZERO_ENTRIES:
16841a2c6b5cSJunchao Zhang   case MAT_FORM_EXPLICIT_TRANSPOSE:
1685fa1f0d2cSMatthew G Knepley     MatCheckPreallocated(A,1);
16869566063dSJacob Faibussowitsch     PetscCall(MatSetOption(a->A,op,flg));
16879566063dSJacob Faibussowitsch     PetscCall(MatSetOption(a->B,op,flg));
168812c028f9SKris Buschelman     break;
168912c028f9SKris Buschelman   case MAT_ROW_ORIENTED:
169043674050SBarry Smith     MatCheckPreallocated(A,1);
16914e0d8c25SBarry Smith     a->roworiented = flg;
16922205254eSKarl Rupp 
16939566063dSJacob Faibussowitsch     PetscCall(MatSetOption(a->A,op,flg));
16949566063dSJacob Faibussowitsch     PetscCall(MatSetOption(a->B,op,flg));
169512c028f9SKris Buschelman     break;
16968c78258cSHong Zhang   case MAT_FORCE_DIAGONAL_ENTRIES:
1697071fcb05SBarry Smith   case MAT_SORTED_FULL:
16989566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op]));
169912c028f9SKris Buschelman     break;
170012c028f9SKris Buschelman   case MAT_IGNORE_OFF_PROC_ENTRIES:
17015c0f0b64SBarry Smith     a->donotstash = flg;
170212c028f9SKris Buschelman     break;
1703c8ca1fbcSVaclav Hapla   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1704ffa07934SHong Zhang   case MAT_SPD:
170577e54ba9SKris Buschelman   case MAT_SYMMETRIC:
170677e54ba9SKris Buschelman   case MAT_STRUCTURALLY_SYMMETRIC:
1707bf108f30SBarry Smith   case MAT_HERMITIAN:
1708bf108f30SBarry Smith   case MAT_SYMMETRY_ETERNAL:
170977e54ba9SKris Buschelman     break;
1710c10200c1SHong Zhang   case MAT_SUBMAT_SINGLEIS:
1711c10200c1SHong Zhang     A->submat_singleis = flg;
1712c10200c1SHong Zhang     break;
1713957cac9fSHong Zhang   case MAT_STRUCTURE_ONLY:
1714957cac9fSHong Zhang     /* The option is handled directly by MatSetOption() */
1715957cac9fSHong Zhang     break;
171612c028f9SKris Buschelman   default:
171798921bdaSJacob Faibussowitsch     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
17183a40ed3dSBarry Smith   }
17193a40ed3dSBarry Smith   PetscFunctionReturn(0);
1720c74985f6SBarry Smith }
1721c74985f6SBarry Smith 
1722b1d57f15SBarry Smith PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
172339e00950SLois Curfman McInnes {
1724154123eaSLois Curfman McInnes   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
172587828ca2SBarry Smith   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1726d0f46423SBarry Smith   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1727d0f46423SBarry Smith   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1728b1d57f15SBarry Smith   PetscInt       *cmap,*idx_p;
172939e00950SLois Curfman McInnes 
17303a40ed3dSBarry Smith   PetscFunctionBegin;
173128b400f6SJacob Faibussowitsch   PetscCheck(!mat->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
17327a0afa10SBarry Smith   mat->getrowactive = PETSC_TRUE;
17337a0afa10SBarry Smith 
173470f0671dSBarry Smith   if (!mat->rowvalues && (idx || v)) {
17357a0afa10SBarry Smith     /*
17367a0afa10SBarry Smith         allocate enough space to hold information from the longest row.
17377a0afa10SBarry Smith     */
17387a0afa10SBarry Smith     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1739b1d57f15SBarry Smith     PetscInt   max = 1,tmp;
1740d0f46423SBarry Smith     for (i=0; i<matin->rmap->n; i++) {
17417a0afa10SBarry Smith       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
17422205254eSKarl Rupp       if (max < tmp) max = tmp;
17437a0afa10SBarry Smith     }
17449566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices));
17457a0afa10SBarry Smith   }
17467a0afa10SBarry Smith 
1747*aed4548fSBarry Smith   PetscCheck(row >= rstart && row < rend,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1748abc0e9e4SLois Curfman McInnes   lrow = row - rstart;
174939e00950SLois Curfman McInnes 
1750154123eaSLois Curfman McInnes   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1751f4259b30SLisandro Dalcin   if (!v)   {pvA = NULL; pvB = NULL;}
1752f4259b30SLisandro Dalcin   if (!idx) {pcA = NULL; if (!v) pcB = NULL;}
17539566063dSJacob Faibussowitsch   PetscCall((*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA));
17549566063dSJacob Faibussowitsch   PetscCall((*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB));
1755154123eaSLois Curfman McInnes   nztot = nzA + nzB;
1756154123eaSLois Curfman McInnes 
175770f0671dSBarry Smith   cmap = mat->garray;
1758154123eaSLois Curfman McInnes   if (v  || idx) {
1759154123eaSLois Curfman McInnes     if (nztot) {
1760154123eaSLois Curfman McInnes       /* Sort by increasing column numbers, assuming A and B already sorted */
1761b1d57f15SBarry Smith       PetscInt imark = -1;
1762154123eaSLois Curfman McInnes       if (v) {
176370f0671dSBarry Smith         *v = v_p = mat->rowvalues;
176439e00950SLois Curfman McInnes         for (i=0; i<nzB; i++) {
176570f0671dSBarry Smith           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1766154123eaSLois Curfman McInnes           else break;
1767154123eaSLois Curfman McInnes         }
1768154123eaSLois Curfman McInnes         imark = i;
176970f0671dSBarry Smith         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
177070f0671dSBarry Smith         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1771154123eaSLois Curfman McInnes       }
1772154123eaSLois Curfman McInnes       if (idx) {
177370f0671dSBarry Smith         *idx = idx_p = mat->rowindices;
177470f0671dSBarry Smith         if (imark > -1) {
177570f0671dSBarry Smith           for (i=0; i<imark; i++) {
177670f0671dSBarry Smith             idx_p[i] = cmap[cworkB[i]];
177770f0671dSBarry Smith           }
177870f0671dSBarry Smith         } else {
1779154123eaSLois Curfman McInnes           for (i=0; i<nzB; i++) {
178070f0671dSBarry Smith             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1781154123eaSLois Curfman McInnes             else break;
1782154123eaSLois Curfman McInnes           }
1783154123eaSLois Curfman McInnes           imark = i;
178470f0671dSBarry Smith         }
178570f0671dSBarry Smith         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
178670f0671dSBarry Smith         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
178739e00950SLois Curfman McInnes       }
17883f97c4b0SBarry Smith     } else {
1789f4259b30SLisandro Dalcin       if (idx) *idx = NULL;
1790f4259b30SLisandro Dalcin       if (v)   *v   = NULL;
17911ca473b0SSatish Balay     }
1792154123eaSLois Curfman McInnes   }
179339e00950SLois Curfman McInnes   *nz  = nztot;
17949566063dSJacob Faibussowitsch   PetscCall((*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA));
17959566063dSJacob Faibussowitsch   PetscCall((*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB));
17963a40ed3dSBarry Smith   PetscFunctionReturn(0);
179739e00950SLois Curfman McInnes }
179839e00950SLois Curfman McInnes 
1799b1d57f15SBarry Smith PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
180039e00950SLois Curfman McInnes {
18017a0afa10SBarry Smith   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
18023a40ed3dSBarry Smith 
18033a40ed3dSBarry Smith   PetscFunctionBegin;
180428b400f6SJacob Faibussowitsch   PetscCheck(aij->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
18057a0afa10SBarry Smith   aij->getrowactive = PETSC_FALSE;
18063a40ed3dSBarry Smith   PetscFunctionReturn(0);
180739e00950SLois Curfman McInnes }
180839e00950SLois Curfman McInnes 
1809dfbe8321SBarry Smith PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1810855ac2c5SLois Curfman McInnes {
1811855ac2c5SLois Curfman McInnes   Mat_MPIAIJ      *aij  = (Mat_MPIAIJ*)mat->data;
1812ec8511deSBarry Smith   Mat_SeqAIJ      *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1813d0f46423SBarry Smith   PetscInt        i,j,cstart = mat->cmap->rstart;
1814329f5518SBarry Smith   PetscReal       sum = 0.0;
1815fff043a9SJunchao Zhang   const MatScalar *v,*amata,*bmata;
181604ca555eSLois Curfman McInnes 
18173a40ed3dSBarry Smith   PetscFunctionBegin;
181817699dbbSLois Curfman McInnes   if (aij->size == 1) {
18199566063dSJacob Faibussowitsch     PetscCall(MatNorm(aij->A,type,norm));
182037fa93a5SLois Curfman McInnes   } else {
18219566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJGetArrayRead(aij->A,&amata));
18229566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJGetArrayRead(aij->B,&bmata));
182304ca555eSLois Curfman McInnes     if (type == NORM_FROBENIUS) {
1824fff043a9SJunchao Zhang       v = amata;
182504ca555eSLois Curfman McInnes       for (i=0; i<amat->nz; i++) {
1826329f5518SBarry Smith         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
182704ca555eSLois Curfman McInnes       }
1828fff043a9SJunchao Zhang       v = bmata;
182904ca555eSLois Curfman McInnes       for (i=0; i<bmat->nz; i++) {
1830329f5518SBarry Smith         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
183104ca555eSLois Curfman McInnes       }
18321c2dc1cbSBarry Smith       PetscCall(MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat)));
18338f1a2a5eSBarry Smith       *norm = PetscSqrtReal(*norm);
18349566063dSJacob Faibussowitsch       PetscCall(PetscLogFlops(2.0*amat->nz+2.0*bmat->nz));
18353a40ed3dSBarry Smith     } else if (type == NORM_1) { /* max column norm */
1836329f5518SBarry Smith       PetscReal *tmp,*tmp2;
1837b1d57f15SBarry Smith       PetscInt  *jj,*garray = aij->garray;
18389566063dSJacob Faibussowitsch       PetscCall(PetscCalloc1(mat->cmap->N+1,&tmp));
18399566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(mat->cmap->N+1,&tmp2));
184004ca555eSLois Curfman McInnes       *norm = 0.0;
1841fff043a9SJunchao Zhang       v     = amata; jj = amat->j;
184204ca555eSLois Curfman McInnes       for (j=0; j<amat->nz; j++) {
1843bfec09a0SHong Zhang         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
184404ca555eSLois Curfman McInnes       }
1845fff043a9SJunchao Zhang       v = bmata; jj = bmat->j;
184604ca555eSLois Curfman McInnes       for (j=0; j<bmat->nz; j++) {
1847bfec09a0SHong Zhang         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
184804ca555eSLois Curfman McInnes       }
18491c2dc1cbSBarry Smith       PetscCall(MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat)));
1850d0f46423SBarry Smith       for (j=0; j<mat->cmap->N; j++) {
185104ca555eSLois Curfman McInnes         if (tmp2[j] > *norm) *norm = tmp2[j];
185204ca555eSLois Curfman McInnes       }
18539566063dSJacob Faibussowitsch       PetscCall(PetscFree(tmp));
18549566063dSJacob Faibussowitsch       PetscCall(PetscFree(tmp2));
18559566063dSJacob Faibussowitsch       PetscCall(PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0)));
18563a40ed3dSBarry Smith     } else if (type == NORM_INFINITY) { /* max row norm */
1857329f5518SBarry Smith       PetscReal ntemp = 0.0;
1858d0f46423SBarry Smith       for (j=0; j<aij->A->rmap->n; j++) {
1859fff043a9SJunchao Zhang         v   = amata + amat->i[j];
186004ca555eSLois Curfman McInnes         sum = 0.0;
186104ca555eSLois Curfman McInnes         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1862cddf8d76SBarry Smith           sum += PetscAbsScalar(*v); v++;
186304ca555eSLois Curfman McInnes         }
1864fff043a9SJunchao Zhang         v = bmata + bmat->i[j];
186504ca555eSLois Curfman McInnes         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1866cddf8d76SBarry Smith           sum += PetscAbsScalar(*v); v++;
186704ca555eSLois Curfman McInnes         }
1868515d9167SLois Curfman McInnes         if (sum > ntemp) ntemp = sum;
186904ca555eSLois Curfman McInnes       }
18701c2dc1cbSBarry Smith       PetscCall(MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat)));
18719566063dSJacob Faibussowitsch       PetscCall(PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0)));
1872ce94432eSBarry Smith     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
18739566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&amata));
18749566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&bmata));
187537fa93a5SLois Curfman McInnes   }
18763a40ed3dSBarry Smith   PetscFunctionReturn(0);
1877855ac2c5SLois Curfman McInnes }
1878855ac2c5SLois Curfman McInnes 
1879fc4dec0aSBarry Smith PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1880b7c46309SBarry Smith {
1881a8661f62Sandi selinger   Mat_MPIAIJ      *a    =(Mat_MPIAIJ*)A->data,*b;
1882a8661f62Sandi selinger   Mat_SeqAIJ      *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
1883071fcb05SBarry Smith   PetscInt        M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol;
1884071fcb05SBarry Smith   const PetscInt  *ai,*aj,*bi,*bj,*B_diag_i;
1885a8661f62Sandi selinger   Mat             B,A_diag,*B_diag;
1886ce496241SStefano Zampini   const MatScalar *pbv,*bv;
1887b7c46309SBarry Smith 
18883a40ed3dSBarry Smith   PetscFunctionBegin;
188980bcc5a1SJed Brown   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1890da668accSHong Zhang   ai = Aloc->i; aj = Aloc->j;
1891da668accSHong Zhang   bi = Bloc->i; bj = Bloc->j;
1892fc73b1b3SBarry Smith   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
189380bcc5a1SJed Brown     PetscInt             *d_nnz,*g_nnz,*o_nnz;
189480bcc5a1SJed Brown     PetscSFNode          *oloc;
1895713c93b4SJed Brown     PETSC_UNUSED PetscSF sf;
189680bcc5a1SJed Brown 
18979566063dSJacob Faibussowitsch     PetscCall(PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc));
189880bcc5a1SJed Brown     /* compute d_nnz for preallocation */
18999566063dSJacob Faibussowitsch     PetscCall(PetscArrayzero(d_nnz,na));
1900cbc6b225SStefano Zampini     for (i=0; i<ai[ma]; i++) d_nnz[aj[i]]++;
190180bcc5a1SJed Brown     /* compute local off-diagonal contributions */
19029566063dSJacob Faibussowitsch     PetscCall(PetscArrayzero(g_nnz,nb));
190380bcc5a1SJed Brown     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
190480bcc5a1SJed Brown     /* map those to global */
19059566063dSJacob Faibussowitsch     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf));
19069566063dSJacob Faibussowitsch     PetscCall(PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray));
19079566063dSJacob Faibussowitsch     PetscCall(PetscSFSetFromOptions(sf));
19089566063dSJacob Faibussowitsch     PetscCall(PetscArrayzero(o_nnz,na));
19099566063dSJacob Faibussowitsch     PetscCall(PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM));
19109566063dSJacob Faibussowitsch     PetscCall(PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM));
19119566063dSJacob Faibussowitsch     PetscCall(PetscSFDestroy(&sf));
1912d4bb536fSBarry Smith 
19139566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
19149566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(B,A->cmap->n,A->rmap->n,N,M));
19159566063dSJacob Faibussowitsch     PetscCall(MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs)));
19169566063dSJacob Faibussowitsch     PetscCall(MatSetType(B,((PetscObject)A)->type_name));
19179566063dSJacob Faibussowitsch     PetscCall(MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz));
19189566063dSJacob Faibussowitsch     PetscCall(PetscFree4(d_nnz,o_nnz,g_nnz,oloc));
1919fc4dec0aSBarry Smith   } else {
1920fc4dec0aSBarry Smith     B    = *matout;
19219566063dSJacob Faibussowitsch     PetscCall(MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE));
1922fc4dec0aSBarry Smith   }
1923b7c46309SBarry Smith 
1924f79cb1a0Sandi selinger   b           = (Mat_MPIAIJ*)B->data;
1925a8661f62Sandi selinger   A_diag      = a->A;
1926a8661f62Sandi selinger   B_diag      = &b->A;
1927a8661f62Sandi selinger   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
1928a8661f62Sandi selinger   A_diag_ncol = A_diag->cmap->N;
1929a8661f62Sandi selinger   B_diag_ilen = sub_B_diag->ilen;
1930a8661f62Sandi selinger   B_diag_i    = sub_B_diag->i;
1931f79cb1a0Sandi selinger 
1932f79cb1a0Sandi selinger   /* Set ilen for diagonal of B */
1933a8661f62Sandi selinger   for (i=0; i<A_diag_ncol; i++) {
1934a8661f62Sandi selinger     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
1935b7c46309SBarry Smith   }
1936f79cb1a0Sandi selinger 
1937a8661f62Sandi selinger   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
1938a8661f62Sandi selinger   very quickly (=without using MatSetValues), because all writes are local. */
19399566063dSJacob Faibussowitsch   PetscCall(MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag));
1940f79cb1a0Sandi selinger 
1941b7c46309SBarry Smith   /* copy over the B part */
19429566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(bi[mb],&cols));
19439566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayRead(a->B,&bv));
1944ce496241SStefano Zampini   pbv  = bv;
1945d0f46423SBarry Smith   row  = A->rmap->rstart;
19462205254eSKarl Rupp   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
194761a2fbbaSHong Zhang   cols_tmp = cols;
1948da668accSHong Zhang   for (i=0; i<mb; i++) {
1949da668accSHong Zhang     ncol = bi[i+1]-bi[i];
19509566063dSJacob Faibussowitsch     PetscCall(MatSetValues(B,ncol,cols_tmp,1,&row,pbv,INSERT_VALUES));
19512205254eSKarl Rupp     row++;
1952ce496241SStefano Zampini     pbv += ncol; cols_tmp += ncol;
1953b7c46309SBarry Smith   }
19549566063dSJacob Faibussowitsch   PetscCall(PetscFree(cols));
19559566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayRead(a->B,&bv));
1956fc73b1b3SBarry Smith 
19579566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
19589566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
1959cf37664fSBarry Smith   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
19600de55854SLois Curfman McInnes     *matout = B;
19610de55854SLois Curfman McInnes   } else {
19629566063dSJacob Faibussowitsch     PetscCall(MatHeaderMerge(A,&B));
19630de55854SLois Curfman McInnes   }
19643a40ed3dSBarry Smith   PetscFunctionReturn(0);
1965b7c46309SBarry Smith }
1966b7c46309SBarry Smith 
1967dfbe8321SBarry Smith PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1968a008b906SSatish Balay {
19694b967eb1SSatish Balay   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
19704b967eb1SSatish Balay   Mat            a    = aij->A,b = aij->B;
1971b1d57f15SBarry Smith   PetscInt       s1,s2,s3;
1972a008b906SSatish Balay 
19733a40ed3dSBarry Smith   PetscFunctionBegin;
19749566063dSJacob Faibussowitsch   PetscCall(MatGetLocalSize(mat,&s2,&s3));
19754b967eb1SSatish Balay   if (rr) {
19769566063dSJacob Faibussowitsch     PetscCall(VecGetLocalSize(rr,&s1));
197708401ef6SPierre Jolivet     PetscCheck(s1==s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
19784b967eb1SSatish Balay     /* Overlap communication with computation. */
19799566063dSJacob Faibussowitsch     PetscCall(VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD));
1980a008b906SSatish Balay   }
19814b967eb1SSatish Balay   if (ll) {
19829566063dSJacob Faibussowitsch     PetscCall(VecGetLocalSize(ll,&s1));
198308401ef6SPierre Jolivet     PetscCheck(s1==s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
19849566063dSJacob Faibussowitsch     PetscCall((*b->ops->diagonalscale)(b,ll,NULL));
19854b967eb1SSatish Balay   }
19864b967eb1SSatish Balay   /* scale  the diagonal block */
19879566063dSJacob Faibussowitsch   PetscCall((*a->ops->diagonalscale)(a,ll,rr));
19884b967eb1SSatish Balay 
19894b967eb1SSatish Balay   if (rr) {
19904b967eb1SSatish Balay     /* Do a scatter end and then right scale the off-diagonal block */
19919566063dSJacob Faibussowitsch     PetscCall(VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD));
19929566063dSJacob Faibussowitsch     PetscCall((*b->ops->diagonalscale)(b,NULL,aij->lvec));
19934b967eb1SSatish Balay   }
19943a40ed3dSBarry Smith   PetscFunctionReturn(0);
1995a008b906SSatish Balay }
1996a008b906SSatish Balay 
1997dfbe8321SBarry Smith PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1998bb5a7306SBarry Smith {
1999bb5a7306SBarry Smith   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
20003a40ed3dSBarry Smith 
20013a40ed3dSBarry Smith   PetscFunctionBegin;
20029566063dSJacob Faibussowitsch   PetscCall(MatSetUnfactored(a->A));
20033a40ed3dSBarry Smith   PetscFunctionReturn(0);
2004bb5a7306SBarry Smith }
2005bb5a7306SBarry Smith 
2006ace3abfcSBarry Smith PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2007d4bb536fSBarry Smith {
2008d4bb536fSBarry Smith   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2009d4bb536fSBarry Smith   Mat            a,b,c,d;
2010ace3abfcSBarry Smith   PetscBool      flg;
2011d4bb536fSBarry Smith 
20123a40ed3dSBarry Smith   PetscFunctionBegin;
2013d4bb536fSBarry Smith   a = matA->A; b = matA->B;
2014d4bb536fSBarry Smith   c = matB->A; d = matB->B;
2015d4bb536fSBarry Smith 
20169566063dSJacob Faibussowitsch   PetscCall(MatEqual(a,c,&flg));
2017abc0a331SBarry Smith   if (flg) {
20189566063dSJacob Faibussowitsch     PetscCall(MatEqual(b,d,&flg));
2019d4bb536fSBarry Smith   }
20201c2dc1cbSBarry Smith   PetscCall(MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A)));
20213a40ed3dSBarry Smith   PetscFunctionReturn(0);
2022d4bb536fSBarry Smith }
2023d4bb536fSBarry Smith 
2024dfbe8321SBarry Smith PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2025cb5b572fSBarry Smith {
2026cb5b572fSBarry Smith   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2027cb5b572fSBarry Smith   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2028cb5b572fSBarry Smith 
2029cb5b572fSBarry Smith   PetscFunctionBegin;
203033f4a19fSKris Buschelman   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
203133f4a19fSKris Buschelman   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2032cb5b572fSBarry Smith     /* because of the column compression in the off-processor part of the matrix a->B,
2033cb5b572fSBarry Smith        the number of columns in a->B and b->B may be different, hence we cannot call
2034cb5b572fSBarry Smith        the MatCopy() directly on the two parts. If need be, we can provide a more
2035cb5b572fSBarry Smith        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2036cb5b572fSBarry Smith        then copying the submatrices */
20379566063dSJacob Faibussowitsch     PetscCall(MatCopy_Basic(A,B,str));
2038cb5b572fSBarry Smith   } else {
20399566063dSJacob Faibussowitsch     PetscCall(MatCopy(a->A,b->A,str));
20409566063dSJacob Faibussowitsch     PetscCall(MatCopy(a->B,b->B,str));
2041cb5b572fSBarry Smith   }
20429566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateIncrease((PetscObject)B));
2043cb5b572fSBarry Smith   PetscFunctionReturn(0);
2044cb5b572fSBarry Smith }
2045cb5b572fSBarry Smith 
20464994cf47SJed Brown PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2047273d9f13SBarry Smith {
2048273d9f13SBarry Smith   PetscFunctionBegin;
20499566063dSJacob Faibussowitsch   PetscCall(MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL));
2050273d9f13SBarry Smith   PetscFunctionReturn(0);
2051273d9f13SBarry Smith }
2052273d9f13SBarry Smith 
2053001ddc4fSHong Zhang /*
2054001ddc4fSHong Zhang    Computes the number of nonzeros per row needed for preallocation when X and Y
2055001ddc4fSHong Zhang    have different nonzero structure.
2056001ddc4fSHong Zhang */
2057001ddc4fSHong Zhang PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
205895b7e79eSJed Brown {
2059001ddc4fSHong Zhang   PetscInt       i,j,k,nzx,nzy;
206095b7e79eSJed Brown 
206195b7e79eSJed Brown   PetscFunctionBegin;
206295b7e79eSJed Brown   /* Set the number of nonzeros in the new matrix */
206395b7e79eSJed Brown   for (i=0; i<m; i++) {
2064001ddc4fSHong Zhang     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2065001ddc4fSHong Zhang     nzx = xi[i+1] - xi[i];
2066001ddc4fSHong Zhang     nzy = yi[i+1] - yi[i];
206795b7e79eSJed Brown     nnz[i] = 0;
206895b7e79eSJed Brown     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2069001ddc4fSHong Zhang       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2070001ddc4fSHong Zhang       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
207195b7e79eSJed Brown       nnz[i]++;
207295b7e79eSJed Brown     }
207395b7e79eSJed Brown     for (; k<nzy; k++) nnz[i]++;
207495b7e79eSJed Brown   }
207595b7e79eSJed Brown   PetscFunctionReturn(0);
207695b7e79eSJed Brown }
207795b7e79eSJed Brown 
2078001ddc4fSHong Zhang /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2079001ddc4fSHong Zhang static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2080001ddc4fSHong Zhang {
2081001ddc4fSHong Zhang   PetscInt       m = Y->rmap->N;
2082001ddc4fSHong Zhang   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2083001ddc4fSHong Zhang   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2084001ddc4fSHong Zhang 
2085001ddc4fSHong Zhang   PetscFunctionBegin;
20869566063dSJacob Faibussowitsch   PetscCall(MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz));
2087001ddc4fSHong Zhang   PetscFunctionReturn(0);
2088001ddc4fSHong Zhang }
2089001ddc4fSHong Zhang 
2090f4df32b1SMatthew Knepley PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2091ac90fabeSBarry Smith {
2092ac90fabeSBarry Smith   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2093ac90fabeSBarry Smith 
2094ac90fabeSBarry Smith   PetscFunctionBegin;
2095ac90fabeSBarry Smith   if (str == SAME_NONZERO_PATTERN) {
20969566063dSJacob Faibussowitsch     PetscCall(MatAXPY(yy->A,a,xx->A,str));
20979566063dSJacob Faibussowitsch     PetscCall(MatAXPY(yy->B,a,xx->B,str));
2098ab784542SHong Zhang   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
20999566063dSJacob Faibussowitsch     PetscCall(MatAXPY_Basic(Y,a,X,str));
2100ac90fabeSBarry Smith   } else {
21019f5f6813SShri Abhyankar     Mat      B;
21029f5f6813SShri Abhyankar     PetscInt *nnz_d,*nnz_o;
2103d9d719b4SStefano Zampini 
21049566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(yy->A->rmap->N,&nnz_d));
21059566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(yy->B->rmap->N,&nnz_o));
21069566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y),&B));
21079566063dSJacob Faibussowitsch     PetscCall(PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name));
21089566063dSJacob Faibussowitsch     PetscCall(MatSetLayouts(B,Y->rmap,Y->cmap));
21099566063dSJacob Faibussowitsch     PetscCall(MatSetType(B,((PetscObject)Y)->type_name));
21109566063dSJacob Faibussowitsch     PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d));
21119566063dSJacob Faibussowitsch     PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o));
21129566063dSJacob Faibussowitsch     PetscCall(MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o));
21139566063dSJacob Faibussowitsch     PetscCall(MatAXPY_BasicWithPreallocation(B,Y,a,X,str));
21149566063dSJacob Faibussowitsch     PetscCall(MatHeaderMerge(Y,&B));
21159566063dSJacob Faibussowitsch     PetscCall(PetscFree(nnz_d));
21169566063dSJacob Faibussowitsch     PetscCall(PetscFree(nnz_o));
2117ac90fabeSBarry Smith   }
2118ac90fabeSBarry Smith   PetscFunctionReturn(0);
2119ac90fabeSBarry Smith }
2120ac90fabeSBarry Smith 
21212726fb6dSPierre Jolivet PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2122354c94deSBarry Smith 
21237087cfbeSBarry Smith PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2124354c94deSBarry Smith {
21255f80ce2aSJacob Faibussowitsch   PetscFunctionBegin;
21265f80ce2aSJacob Faibussowitsch   if (PetscDefined(USE_COMPLEX)) {
2127354c94deSBarry Smith     Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2128354c94deSBarry Smith 
21299566063dSJacob Faibussowitsch     PetscCall(MatConjugate_SeqAIJ(aij->A));
21309566063dSJacob Faibussowitsch     PetscCall(MatConjugate_SeqAIJ(aij->B));
21315f80ce2aSJacob Faibussowitsch   }
2132354c94deSBarry Smith   PetscFunctionReturn(0);
2133354c94deSBarry Smith }
2134354c94deSBarry Smith 
213599cafbc1SBarry Smith PetscErrorCode MatRealPart_MPIAIJ(Mat A)
213699cafbc1SBarry Smith {
213799cafbc1SBarry Smith   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
213899cafbc1SBarry Smith 
213999cafbc1SBarry Smith   PetscFunctionBegin;
21409566063dSJacob Faibussowitsch   PetscCall(MatRealPart(a->A));
21419566063dSJacob Faibussowitsch   PetscCall(MatRealPart(a->B));
214299cafbc1SBarry Smith   PetscFunctionReturn(0);
214399cafbc1SBarry Smith }
214499cafbc1SBarry Smith 
214599cafbc1SBarry Smith PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
214699cafbc1SBarry Smith {
214799cafbc1SBarry Smith   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
214899cafbc1SBarry Smith 
214999cafbc1SBarry Smith   PetscFunctionBegin;
21509566063dSJacob Faibussowitsch   PetscCall(MatImaginaryPart(a->A));
21519566063dSJacob Faibussowitsch   PetscCall(MatImaginaryPart(a->B));
215299cafbc1SBarry Smith   PetscFunctionReturn(0);
215399cafbc1SBarry Smith }
215499cafbc1SBarry Smith 
2155c91732d9SHong Zhang PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2156c91732d9SHong Zhang {
2157c91732d9SHong Zhang   Mat_MPIAIJ        *a = (Mat_MPIAIJ*)A->data;
2158475b8b61SHong Zhang   PetscInt          i,*idxb = NULL,m = A->rmap->n;
2159475b8b61SHong Zhang   PetscScalar       *va,*vv;
2160475b8b61SHong Zhang   Vec               vB,vA;
2161475b8b61SHong Zhang   const PetscScalar *vb;
2162c91732d9SHong Zhang 
2163c91732d9SHong Zhang   PetscFunctionBegin;
21649566063dSJacob Faibussowitsch   PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vA));
21659566063dSJacob Faibussowitsch   PetscCall(MatGetRowMaxAbs(a->A,vA,idx));
2166475b8b61SHong Zhang 
21679566063dSJacob Faibussowitsch   PetscCall(VecGetArrayWrite(vA,&va));
2168c91732d9SHong Zhang   if (idx) {
2169475b8b61SHong Zhang     for (i=0; i<m; i++) {
2170d0f46423SBarry Smith       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2171c91732d9SHong Zhang     }
2172c91732d9SHong Zhang   }
2173c91732d9SHong Zhang 
21749566063dSJacob Faibussowitsch   PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vB));
21759566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(m,&idxb));
21769566063dSJacob Faibussowitsch   PetscCall(MatGetRowMaxAbs(a->B,vB,idxb));
2177c91732d9SHong Zhang 
21789566063dSJacob Faibussowitsch   PetscCall(VecGetArrayWrite(v,&vv));
21799566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(vB,&vb));
2180475b8b61SHong Zhang   for (i=0; i<m; i++) {
2181c91732d9SHong Zhang     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2182475b8b61SHong Zhang       vv[i] = vb[i];
2183c91732d9SHong Zhang       if (idx) idx[i] = a->garray[idxb[i]];
2184475b8b61SHong Zhang     } else {
2185475b8b61SHong Zhang       vv[i] = va[i];
21864e879edeSHong Zhang       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]])
2187475b8b61SHong Zhang         idx[i] = a->garray[idxb[i]];
2188c91732d9SHong Zhang     }
2189c91732d9SHong Zhang   }
21909566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayWrite(vA,&vv));
21919566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayWrite(vA,&va));
21929566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(vB,&vb));
21939566063dSJacob Faibussowitsch   PetscCall(PetscFree(idxb));
21949566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&vA));
21959566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&vB));
2196c91732d9SHong Zhang   PetscFunctionReturn(0);
2197c91732d9SHong Zhang }
2198c91732d9SHong Zhang 
2199c87e5d42SMatthew Knepley PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2200c87e5d42SMatthew Knepley {
2201f07e67edSHong Zhang   Mat_MPIAIJ        *mat   = (Mat_MPIAIJ*) A->data;
2202f07e67edSHong Zhang   PetscInt          m = A->rmap->n,n = A->cmap->n;
2203f07e67edSHong Zhang   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2204f07e67edSHong Zhang   PetscInt          *cmap  = mat->garray;
2205f07e67edSHong Zhang   PetscInt          *diagIdx, *offdiagIdx;
2206f07e67edSHong Zhang   Vec               diagV, offdiagV;
2207ce496241SStefano Zampini   PetscScalar       *a, *diagA, *offdiagA;
2208ce496241SStefano Zampini   const PetscScalar *ba,*bav;
2209f07e67edSHong Zhang   PetscInt          r,j,col,ncols,*bi,*bj;
2210f07e67edSHong Zhang   Mat               B = mat->B;
2211f07e67edSHong Zhang   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2212c87e5d42SMatthew Knepley 
2213c87e5d42SMatthew Knepley   PetscFunctionBegin;
2214f07e67edSHong Zhang   /* When a process holds entire A and other processes have no entry */
2215f07e67edSHong Zhang   if (A->cmap->N == n) {
22169566063dSJacob Faibussowitsch     PetscCall(VecGetArrayWrite(v,&diagA));
22179566063dSJacob Faibussowitsch     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV));
22189566063dSJacob Faibussowitsch     PetscCall(MatGetRowMinAbs(mat->A,diagV,idx));
22199566063dSJacob Faibussowitsch     PetscCall(VecDestroy(&diagV));
22209566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayWrite(v,&diagA));
2221f07e67edSHong Zhang     PetscFunctionReturn(0);
2222f07e67edSHong Zhang   } else if (n == 0) {
2223f07e67edSHong Zhang     if (m) {
22249566063dSJacob Faibussowitsch       PetscCall(VecGetArrayWrite(v,&a));
2225f07e67edSHong Zhang       for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;}
22269566063dSJacob Faibussowitsch       PetscCall(VecRestoreArrayWrite(v,&a));
2227f07e67edSHong Zhang     }
2228f07e67edSHong Zhang     PetscFunctionReturn(0);
2229f07e67edSHong Zhang   }
2230f07e67edSHong Zhang 
22319566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(m,&diagIdx,m,&offdiagIdx));
22329566063dSJacob Faibussowitsch   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
22339566063dSJacob Faibussowitsch   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
22349566063dSJacob Faibussowitsch   PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx));
2235f07e67edSHong Zhang 
2236f07e67edSHong Zhang   /* Get offdiagIdx[] for implicit 0.0 */
22379566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayRead(B,&bav));
2238ce496241SStefano Zampini   ba   = bav;
2239f07e67edSHong Zhang   bi   = b->i;
2240f07e67edSHong Zhang   bj   = b->j;
22419566063dSJacob Faibussowitsch   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2242f07e67edSHong Zhang   for (r = 0; r < m; r++) {
2243f07e67edSHong Zhang     ncols = bi[r+1] - bi[r];
2244f07e67edSHong Zhang     if (ncols == A->cmap->N - n) { /* Brow is dense */
2245f07e67edSHong Zhang       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2246f07e67edSHong Zhang     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2247f07e67edSHong Zhang       offdiagA[r] = 0.0;
2248f07e67edSHong Zhang 
2249f07e67edSHong Zhang       /* Find first hole in the cmap */
2250f07e67edSHong Zhang       for (j=0; j<ncols; j++) {
2251f07e67edSHong Zhang         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2252f07e67edSHong Zhang         if (col > j && j < cstart) {
2253f07e67edSHong Zhang           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2254f07e67edSHong Zhang           break;
2255f07e67edSHong Zhang         } else if (col > j + n && j >= cstart) {
2256f07e67edSHong Zhang           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2257f07e67edSHong Zhang           break;
2258f07e67edSHong Zhang         }
2259f07e67edSHong Zhang       }
22604e879edeSHong Zhang       if (j == ncols && ncols < A->cmap->N - n) {
2261f07e67edSHong Zhang         /* a hole is outside compressed Bcols */
2262f07e67edSHong Zhang         if (ncols == 0) {
2263f07e67edSHong Zhang           if (cstart) {
2264f07e67edSHong Zhang             offdiagIdx[r] = 0;
2265f07e67edSHong Zhang           } else offdiagIdx[r] = cend;
2266f07e67edSHong Zhang         } else { /* ncols > 0 */
2267f07e67edSHong Zhang           offdiagIdx[r] = cmap[ncols-1] + 1;
2268f07e67edSHong Zhang           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2269f07e67edSHong Zhang         }
2270f07e67edSHong Zhang       }
2271f07e67edSHong Zhang     }
2272f07e67edSHong Zhang 
2273f07e67edSHong Zhang     for (j=0; j<ncols; j++) {
2274f07e67edSHong Zhang       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2275f07e67edSHong Zhang       ba++; bj++;
2276f07e67edSHong Zhang     }
2277f07e67edSHong Zhang   }
2278f07e67edSHong Zhang 
22799566063dSJacob Faibussowitsch   PetscCall(VecGetArrayWrite(v, &a));
22809566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(diagV, (const PetscScalar**)&diagA));
2281f07e67edSHong Zhang   for (r = 0; r < m; ++r) {
2282f07e67edSHong Zhang     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2283f07e67edSHong Zhang       a[r]   = diagA[r];
2284f07e67edSHong Zhang       if (idx) idx[r] = cstart + diagIdx[r];
2285f07e67edSHong Zhang     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2286f07e67edSHong Zhang       a[r] = diagA[r];
2287c87e5d42SMatthew Knepley       if (idx) {
2288f07e67edSHong Zhang         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2289f07e67edSHong Zhang           idx[r] = cstart + diagIdx[r];
2290f07e67edSHong Zhang         } else idx[r] = offdiagIdx[r];
2291f07e67edSHong Zhang       }
2292f07e67edSHong Zhang     } else {
2293f07e67edSHong Zhang       a[r]   = offdiagA[r];
2294f07e67edSHong Zhang       if (idx) idx[r] = offdiagIdx[r];
2295c87e5d42SMatthew Knepley     }
2296c87e5d42SMatthew Knepley   }
22979566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayRead(B,&bav));
22989566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayWrite(v, &a));
22999566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA));
23009566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
23019566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&diagV));
23029566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&offdiagV));
23039566063dSJacob Faibussowitsch   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2304c87e5d42SMatthew Knepley   PetscFunctionReturn(0);
2305c87e5d42SMatthew Knepley }
2306c87e5d42SMatthew Knepley 
230703bc72f1SMatthew Knepley PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[])
230803bc72f1SMatthew Knepley {
230903bc72f1SMatthew Knepley   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*) A->data;
2310fa213d2fSHong Zhang   PetscInt          m = A->rmap->n,n = A->cmap->n;
2311fa213d2fSHong Zhang   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
231203bc72f1SMatthew Knepley   PetscInt          *cmap  = mat->garray;
231303bc72f1SMatthew Knepley   PetscInt          *diagIdx, *offdiagIdx;
231403bc72f1SMatthew Knepley   Vec               diagV, offdiagV;
2315ce496241SStefano Zampini   PetscScalar       *a, *diagA, *offdiagA;
2316ce496241SStefano Zampini   const PetscScalar *ba,*bav;
2317fa213d2fSHong Zhang   PetscInt          r,j,col,ncols,*bi,*bj;
2318fa213d2fSHong Zhang   Mat               B = mat->B;
2319fa213d2fSHong Zhang   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
232003bc72f1SMatthew Knepley 
232103bc72f1SMatthew Knepley   PetscFunctionBegin;
2322fa213d2fSHong Zhang   /* When a process holds entire A and other processes have no entry */
2323fa213d2fSHong Zhang   if (A->cmap->N == n) {
23249566063dSJacob Faibussowitsch     PetscCall(VecGetArrayWrite(v,&diagA));
23259566063dSJacob Faibussowitsch     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV));
23269566063dSJacob Faibussowitsch     PetscCall(MatGetRowMin(mat->A,diagV,idx));
23279566063dSJacob Faibussowitsch     PetscCall(VecDestroy(&diagV));
23289566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayWrite(v,&diagA));
2329fa213d2fSHong Zhang     PetscFunctionReturn(0);
2330fa213d2fSHong Zhang   } else if (n == 0) {
2331fa213d2fSHong Zhang     if (m) {
23329566063dSJacob Faibussowitsch       PetscCall(VecGetArrayWrite(v,&a));
2333fa213d2fSHong Zhang       for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;}
23349566063dSJacob Faibussowitsch       PetscCall(VecRestoreArrayWrite(v,&a));
2335fa213d2fSHong Zhang     }
2336fa213d2fSHong Zhang     PetscFunctionReturn(0);
2337fa213d2fSHong Zhang   }
2338fa213d2fSHong Zhang 
23399566063dSJacob Faibussowitsch   PetscCall(PetscCalloc2(m,&diagIdx,m,&offdiagIdx));
23409566063dSJacob Faibussowitsch   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
23419566063dSJacob Faibussowitsch   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
23429566063dSJacob Faibussowitsch   PetscCall(MatGetRowMin(mat->A, diagV, diagIdx));
2343fa213d2fSHong Zhang 
2344fa213d2fSHong Zhang   /* Get offdiagIdx[] for implicit 0.0 */
23459566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayRead(B,&bav));
2346ce496241SStefano Zampini   ba   = bav;
2347fa213d2fSHong Zhang   bi   = b->i;
2348fa213d2fSHong Zhang   bj   = b->j;
23499566063dSJacob Faibussowitsch   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2350fa213d2fSHong Zhang   for (r = 0; r < m; r++) {
2351fa213d2fSHong Zhang     ncols = bi[r+1] - bi[r];
2352fa213d2fSHong Zhang     if (ncols == A->cmap->N - n) { /* Brow is dense */
2353fa213d2fSHong Zhang       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2354fa213d2fSHong Zhang     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2355fa213d2fSHong Zhang       offdiagA[r] = 0.0;
2356fa213d2fSHong Zhang 
2357fa213d2fSHong Zhang       /* Find first hole in the cmap */
2358fa213d2fSHong Zhang       for (j=0; j<ncols; j++) {
2359fa213d2fSHong Zhang         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2360fa213d2fSHong Zhang         if (col > j && j < cstart) {
2361fa213d2fSHong Zhang           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2362fa213d2fSHong Zhang           break;
2363fa213d2fSHong Zhang         } else if (col > j + n && j >= cstart) {
2364fa213d2fSHong Zhang           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2365fa213d2fSHong Zhang           break;
2366fa213d2fSHong Zhang         }
2367fa213d2fSHong Zhang       }
23684e879edeSHong Zhang       if (j == ncols && ncols < A->cmap->N - n) {
2369fa213d2fSHong Zhang         /* a hole is outside compressed Bcols */
2370fa213d2fSHong Zhang         if (ncols == 0) {
2371fa213d2fSHong Zhang           if (cstart) {
2372fa213d2fSHong Zhang             offdiagIdx[r] = 0;
2373fa213d2fSHong Zhang           } else offdiagIdx[r] = cend;
2374fa213d2fSHong Zhang         } else { /* ncols > 0 */
2375fa213d2fSHong Zhang           offdiagIdx[r] = cmap[ncols-1] + 1;
2376fa213d2fSHong Zhang           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2377fa213d2fSHong Zhang         }
2378fa213d2fSHong Zhang       }
2379fa213d2fSHong Zhang     }
2380fa213d2fSHong Zhang 
2381fa213d2fSHong Zhang     for (j=0; j<ncols; j++) {
2382fa213d2fSHong Zhang       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2383fa213d2fSHong Zhang       ba++; bj++;
2384fa213d2fSHong Zhang     }
2385fa213d2fSHong Zhang   }
2386fa213d2fSHong Zhang 
23879566063dSJacob Faibussowitsch   PetscCall(VecGetArrayWrite(v, &a));
23889566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(diagV, (const PetscScalar**)&diagA));
2389fa213d2fSHong Zhang   for (r = 0; r < m; ++r) {
2390fa213d2fSHong Zhang     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
239103bc72f1SMatthew Knepley       a[r]   = diagA[r];
2392fa213d2fSHong Zhang       if (idx) idx[r] = cstart + diagIdx[r];
2393fa213d2fSHong Zhang     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2394fa213d2fSHong Zhang       a[r] = diagA[r];
2395fa213d2fSHong Zhang       if (idx) {
2396fa213d2fSHong Zhang         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
239703bc72f1SMatthew Knepley           idx[r] = cstart + diagIdx[r];
2398fa213d2fSHong Zhang         } else idx[r] = offdiagIdx[r];
2399fa213d2fSHong Zhang       }
240003bc72f1SMatthew Knepley     } else {
240103bc72f1SMatthew Knepley       a[r]   = offdiagA[r];
2402fa213d2fSHong Zhang       if (idx) idx[r] = offdiagIdx[r];
240303bc72f1SMatthew Knepley     }
240403bc72f1SMatthew Knepley   }
24059566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayRead(B,&bav));
24069566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayWrite(v, &a));
24079566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA));
24089566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
24099566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&diagV));
24109566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&offdiagV));
24119566063dSJacob Faibussowitsch   PetscCall(PetscFree2(diagIdx, offdiagIdx));
241203bc72f1SMatthew Knepley   PetscFunctionReturn(0);
241303bc72f1SMatthew Knepley }
241403bc72f1SMatthew Knepley 
2415c87e5d42SMatthew Knepley PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2416c87e5d42SMatthew Knepley {
2417c87e5d42SMatthew Knepley   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*)A->data;
24181a254869SHong Zhang   PetscInt          m = A->rmap->n,n = A->cmap->n;
24191a254869SHong Zhang   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2420c87e5d42SMatthew Knepley   PetscInt          *cmap  = mat->garray;
2421c87e5d42SMatthew Knepley   PetscInt          *diagIdx, *offdiagIdx;
2422c87e5d42SMatthew Knepley   Vec               diagV, offdiagV;
2423ce496241SStefano Zampini   PetscScalar       *a, *diagA, *offdiagA;
2424ce496241SStefano Zampini   const PetscScalar *ba,*bav;
24251a254869SHong Zhang   PetscInt          r,j,col,ncols,*bi,*bj;
24261a254869SHong Zhang   Mat               B = mat->B;
24271a254869SHong Zhang   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2428c87e5d42SMatthew Knepley 
2429c87e5d42SMatthew Knepley   PetscFunctionBegin;
24301a254869SHong Zhang   /* When a process holds entire A and other processes have no entry */
24311a254869SHong Zhang   if (A->cmap->N == n) {
24329566063dSJacob Faibussowitsch     PetscCall(VecGetArrayWrite(v,&diagA));
24339566063dSJacob Faibussowitsch     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV));
24349566063dSJacob Faibussowitsch     PetscCall(MatGetRowMax(mat->A,diagV,idx));
24359566063dSJacob Faibussowitsch     PetscCall(VecDestroy(&diagV));
24369566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayWrite(v,&diagA));
24371a254869SHong Zhang     PetscFunctionReturn(0);
24381a254869SHong Zhang   } else if (n == 0) {
24391a254869SHong Zhang     if (m) {
24409566063dSJacob Faibussowitsch       PetscCall(VecGetArrayWrite(v,&a));
24411a254869SHong Zhang       for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;}
24429566063dSJacob Faibussowitsch       PetscCall(VecRestoreArrayWrite(v,&a));
24431a254869SHong Zhang     }
24441a254869SHong Zhang     PetscFunctionReturn(0);
24451a254869SHong Zhang   }
24461a254869SHong Zhang 
24479566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(m,&diagIdx,m,&offdiagIdx));
24489566063dSJacob Faibussowitsch   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
24499566063dSJacob Faibussowitsch   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
24509566063dSJacob Faibussowitsch   PetscCall(MatGetRowMax(mat->A, diagV, diagIdx));
24511a254869SHong Zhang 
24521a254869SHong Zhang   /* Get offdiagIdx[] for implicit 0.0 */
24539566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayRead(B,&bav));
2454ce496241SStefano Zampini   ba   = bav;
24551a254869SHong Zhang   bi   = b->i;
24561a254869SHong Zhang   bj   = b->j;
24579566063dSJacob Faibussowitsch   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
24581a254869SHong Zhang   for (r = 0; r < m; r++) {
24591a254869SHong Zhang     ncols = bi[r+1] - bi[r];
24601a254869SHong Zhang     if (ncols == A->cmap->N - n) { /* Brow is dense */
24611a254869SHong Zhang       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
24621a254869SHong Zhang     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
24631a254869SHong Zhang       offdiagA[r] = 0.0;
24641a254869SHong Zhang 
24651a254869SHong Zhang       /* Find first hole in the cmap */
24661a254869SHong Zhang       for (j=0; j<ncols; j++) {
24671a254869SHong Zhang         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
24681a254869SHong Zhang         if (col > j && j < cstart) {
24691a254869SHong Zhang           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
24701a254869SHong Zhang           break;
24711a254869SHong Zhang         } else if (col > j + n && j >= cstart) {
24721a254869SHong Zhang           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
24731a254869SHong Zhang           break;
24741a254869SHong Zhang         }
24751a254869SHong Zhang       }
24764e879edeSHong Zhang       if (j == ncols && ncols < A->cmap->N - n) {
24771a254869SHong Zhang         /* a hole is outside compressed Bcols */
24781a254869SHong Zhang         if (ncols == 0) {
24791a254869SHong Zhang           if (cstart) {
24801a254869SHong Zhang             offdiagIdx[r] = 0;
24811a254869SHong Zhang           } else offdiagIdx[r] = cend;
24821a254869SHong Zhang         } else { /* ncols > 0 */
24831a254869SHong Zhang           offdiagIdx[r] = cmap[ncols-1] + 1;
24841a254869SHong Zhang           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
24851a254869SHong Zhang         }
24861a254869SHong Zhang       }
24871a254869SHong Zhang     }
24881a254869SHong Zhang 
24891a254869SHong Zhang     for (j=0; j<ncols; j++) {
24901a254869SHong Zhang       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
24911a254869SHong Zhang       ba++; bj++;
24921a254869SHong Zhang     }
24931a254869SHong Zhang   }
24941a254869SHong Zhang 
24959566063dSJacob Faibussowitsch   PetscCall(VecGetArrayWrite(v,    &a));
24969566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(diagV,(const PetscScalar**)&diagA));
24971a254869SHong Zhang   for (r = 0; r < m; ++r) {
24981a254869SHong Zhang     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2499c87e5d42SMatthew Knepley       a[r] = diagA[r];
25001a254869SHong Zhang       if (idx) idx[r] = cstart + diagIdx[r];
25011a254869SHong Zhang     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
25021a254869SHong Zhang       a[r] = diagA[r];
25031a254869SHong Zhang       if (idx) {
25041a254869SHong Zhang         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2505c87e5d42SMatthew Knepley           idx[r] = cstart + diagIdx[r];
25061a254869SHong Zhang         } else idx[r] = offdiagIdx[r];
25071a254869SHong Zhang       }
2508c87e5d42SMatthew Knepley     } else {
2509c87e5d42SMatthew Knepley       a[r] = offdiagA[r];
25101a254869SHong Zhang       if (idx) idx[r] = offdiagIdx[r];
2511c87e5d42SMatthew Knepley     }
2512c87e5d42SMatthew Knepley   }
25139566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayRead(B,&bav));
25149566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayWrite(v,       &a));
25159566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(diagV,   (const PetscScalar**)&diagA));
25169566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayWrite(offdiagV,&offdiagA));
25179566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&diagV));
25189566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&offdiagV));
25199566063dSJacob Faibussowitsch   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2520c87e5d42SMatthew Knepley   PetscFunctionReturn(0);
2521c87e5d42SMatthew Knepley }
2522c87e5d42SMatthew Knepley 
2523d1adec66SJed Brown PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
25245494a064SHong Zhang {
2525f6d58c54SBarry Smith   Mat            *dummy;
25265494a064SHong Zhang 
25275494a064SHong Zhang   PetscFunctionBegin;
25289566063dSJacob Faibussowitsch   PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy));
2529f6d58c54SBarry Smith   *newmat = *dummy;
25309566063dSJacob Faibussowitsch   PetscCall(PetscFree(dummy));
25315494a064SHong Zhang   PetscFunctionReturn(0);
25325494a064SHong Zhang }
25335494a064SHong Zhang 
2534713ccfa9SJed Brown PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2535bbead8a2SBarry Smith {
2536bbead8a2SBarry Smith   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2537bbead8a2SBarry Smith 
2538bbead8a2SBarry Smith   PetscFunctionBegin;
25399566063dSJacob Faibussowitsch   PetscCall(MatInvertBlockDiagonal(a->A,values));
25407b6c816cSBarry Smith   A->factorerrortype = a->A->factorerrortype;
2541bbead8a2SBarry Smith   PetscFunctionReturn(0);
2542bbead8a2SBarry Smith }
2543bbead8a2SBarry Smith 
254473a71a0fSBarry Smith static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
254573a71a0fSBarry Smith {
254673a71a0fSBarry Smith   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
254773a71a0fSBarry Smith 
254873a71a0fSBarry Smith   PetscFunctionBegin;
254908401ef6SPierre Jolivet   PetscCheck(x->assembled || x->preallocated,PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
25509566063dSJacob Faibussowitsch   PetscCall(MatSetRandom(aij->A,rctx));
2551679944adSJunchao Zhang   if (x->assembled) {
25529566063dSJacob Faibussowitsch     PetscCall(MatSetRandom(aij->B,rctx));
2553679944adSJunchao Zhang   } else {
25549566063dSJacob Faibussowitsch     PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx));
2555679944adSJunchao Zhang   }
25569566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY));
25579566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY));
255873a71a0fSBarry Smith   PetscFunctionReturn(0);
255973a71a0fSBarry Smith }
2560bbead8a2SBarry Smith 
2561b1b1104fSBarry Smith PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2562b1b1104fSBarry Smith {
2563b1b1104fSBarry Smith   PetscFunctionBegin;
2564b1b1104fSBarry Smith   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2565b1b1104fSBarry Smith   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2566b1b1104fSBarry Smith   PetscFunctionReturn(0);
2567b1b1104fSBarry Smith }
2568b1b1104fSBarry Smith 
2569b1b1104fSBarry Smith /*@
2570b1b1104fSBarry Smith    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2571b1b1104fSBarry Smith 
2572b1b1104fSBarry Smith    Collective on Mat
2573b1b1104fSBarry Smith 
2574b1b1104fSBarry Smith    Input Parameters:
2575b1b1104fSBarry Smith +    A - the matrix
2576b1b1104fSBarry Smith -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2577b1b1104fSBarry Smith 
257896a0c994SBarry Smith  Level: advanced
257996a0c994SBarry Smith 
2580b1b1104fSBarry Smith @*/
2581b1b1104fSBarry Smith PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2582b1b1104fSBarry Smith {
2583b1b1104fSBarry Smith   PetscFunctionBegin;
2584cac4c232SBarry Smith   PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));
2585b1b1104fSBarry Smith   PetscFunctionReturn(0);
2586b1b1104fSBarry Smith }
2587b1b1104fSBarry Smith 
25884416b707SBarry Smith PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2589b1b1104fSBarry Smith {
2590b1b1104fSBarry Smith   PetscBool            sc = PETSC_FALSE,flg;
2591b1b1104fSBarry Smith 
2592b1b1104fSBarry Smith   PetscFunctionBegin;
2593d0609cedSBarry Smith   PetscOptionsHeadBegin(PetscOptionsObject,"MPIAIJ options");
2594b1b1104fSBarry Smith   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
25959566063dSJacob Faibussowitsch   PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg));
2596b1b1104fSBarry Smith   if (flg) {
25979566063dSJacob Faibussowitsch     PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A,sc));
2598b1b1104fSBarry Smith   }
2599d0609cedSBarry Smith   PetscOptionsHeadEnd();
2600b1b1104fSBarry Smith   PetscFunctionReturn(0);
2601b1b1104fSBarry Smith }
2602b1b1104fSBarry Smith 
26037d68702bSBarry Smith PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
26047d68702bSBarry Smith {
26057d68702bSBarry Smith   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2606c5e4d11fSDmitry Karpeev   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
26077d68702bSBarry Smith 
26087d68702bSBarry Smith   PetscFunctionBegin;
2609c5e4d11fSDmitry Karpeev   if (!Y->preallocated) {
26109566063dSJacob Faibussowitsch     PetscCall(MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL));
2611c5e4d11fSDmitry Karpeev   } else if (!aij->nz) {
2612b83222d8SBarry Smith     PetscInt nonew = aij->nonew;
26139566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJSetPreallocation(maij->A,1,NULL));
2614b83222d8SBarry Smith     aij->nonew = nonew;
26157d68702bSBarry Smith   }
26169566063dSJacob Faibussowitsch   PetscCall(MatShift_Basic(Y,a));
26177d68702bSBarry Smith   PetscFunctionReturn(0);
26187d68702bSBarry Smith }
26197d68702bSBarry Smith 
26203b49f96aSBarry Smith PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
26213b49f96aSBarry Smith {
26223b49f96aSBarry Smith   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
26233b49f96aSBarry Smith 
26243b49f96aSBarry Smith   PetscFunctionBegin;
262508401ef6SPierre Jolivet   PetscCheck(A->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
26269566063dSJacob Faibussowitsch   PetscCall(MatMissingDiagonal(a->A,missing,d));
26273b49f96aSBarry Smith   if (d) {
26283b49f96aSBarry Smith     PetscInt rstart;
26299566063dSJacob Faibussowitsch     PetscCall(MatGetOwnershipRange(A,&rstart,NULL));
26303b49f96aSBarry Smith     *d += rstart;
26313b49f96aSBarry Smith 
26323b49f96aSBarry Smith   }
26333b49f96aSBarry Smith   PetscFunctionReturn(0);
26343b49f96aSBarry Smith }
26353b49f96aSBarry Smith 
2636a8ee9fb5SBarry Smith PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2637a8ee9fb5SBarry Smith {
2638a8ee9fb5SBarry Smith   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2639a8ee9fb5SBarry Smith 
2640a8ee9fb5SBarry Smith   PetscFunctionBegin;
26419566063dSJacob Faibussowitsch   PetscCall(MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag));
2642a8ee9fb5SBarry Smith   PetscFunctionReturn(0);
2643a8ee9fb5SBarry Smith }
26443b49f96aSBarry Smith 
26458a729477SBarry Smith /* -------------------------------------------------------------------*/
2646cda55fadSBarry Smith static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2647cda55fadSBarry Smith                                        MatGetRow_MPIAIJ,
2648cda55fadSBarry Smith                                        MatRestoreRow_MPIAIJ,
2649cda55fadSBarry Smith                                        MatMult_MPIAIJ,
265097304618SKris Buschelman                                 /* 4*/ MatMultAdd_MPIAIJ,
26517c922b88SBarry Smith                                        MatMultTranspose_MPIAIJ,
26527c922b88SBarry Smith                                        MatMultTransposeAdd_MPIAIJ,
2653f4259b30SLisandro Dalcin                                        NULL,
2654f4259b30SLisandro Dalcin                                        NULL,
2655f4259b30SLisandro Dalcin                                        NULL,
2656f4259b30SLisandro Dalcin                                 /*10*/ NULL,
2657f4259b30SLisandro Dalcin                                        NULL,
2658f4259b30SLisandro Dalcin                                        NULL,
265941f059aeSBarry Smith                                        MatSOR_MPIAIJ,
2660b7c46309SBarry Smith                                        MatTranspose_MPIAIJ,
266197304618SKris Buschelman                                 /*15*/ MatGetInfo_MPIAIJ,
2662cda55fadSBarry Smith                                        MatEqual_MPIAIJ,
2663cda55fadSBarry Smith                                        MatGetDiagonal_MPIAIJ,
2664cda55fadSBarry Smith                                        MatDiagonalScale_MPIAIJ,
2665cda55fadSBarry Smith                                        MatNorm_MPIAIJ,
266697304618SKris Buschelman                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2667cda55fadSBarry Smith                                        MatAssemblyEnd_MPIAIJ,
2668cda55fadSBarry Smith                                        MatSetOption_MPIAIJ,
2669cda55fadSBarry Smith                                        MatZeroEntries_MPIAIJ,
2670d519adbfSMatthew Knepley                                 /*24*/ MatZeroRows_MPIAIJ,
2671f4259b30SLisandro Dalcin                                        NULL,
2672f4259b30SLisandro Dalcin                                        NULL,
2673f4259b30SLisandro Dalcin                                        NULL,
2674f4259b30SLisandro Dalcin                                        NULL,
26754994cf47SJed Brown                                 /*29*/ MatSetUp_MPIAIJ,
2676f4259b30SLisandro Dalcin                                        NULL,
2677f4259b30SLisandro Dalcin                                        NULL,
2678a5b7ff6bSBarry Smith                                        MatGetDiagonalBlock_MPIAIJ,
2679f4259b30SLisandro Dalcin                                        NULL,
2680d519adbfSMatthew Knepley                                 /*34*/ MatDuplicate_MPIAIJ,
2681f4259b30SLisandro Dalcin                                        NULL,
2682f4259b30SLisandro Dalcin                                        NULL,
2683f4259b30SLisandro Dalcin                                        NULL,
2684f4259b30SLisandro Dalcin                                        NULL,
2685d519adbfSMatthew Knepley                                 /*39*/ MatAXPY_MPIAIJ,
26867dae84e0SHong Zhang                                        MatCreateSubMatrices_MPIAIJ,
2687cda55fadSBarry Smith                                        MatIncreaseOverlap_MPIAIJ,
2688cda55fadSBarry Smith                                        MatGetValues_MPIAIJ,
2689cb5b572fSBarry Smith                                        MatCopy_MPIAIJ,
2690d519adbfSMatthew Knepley                                 /*44*/ MatGetRowMax_MPIAIJ,
2691cda55fadSBarry Smith                                        MatScale_MPIAIJ,
26927d68702bSBarry Smith                                        MatShift_MPIAIJ,
269399e65526SBarry Smith                                        MatDiagonalSet_MPIAIJ,
2694564f14d6SBarry Smith                                        MatZeroRowsColumns_MPIAIJ,
269573a71a0fSBarry Smith                                 /*49*/ MatSetRandom_MPIAIJ,
2696f4259b30SLisandro Dalcin                                        NULL,
2697f4259b30SLisandro Dalcin                                        NULL,
2698f4259b30SLisandro Dalcin                                        NULL,
2699f4259b30SLisandro Dalcin                                        NULL,
270093dfae19SHong Zhang                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2701f4259b30SLisandro Dalcin                                        NULL,
2702cda55fadSBarry Smith                                        MatSetUnfactored_MPIAIJ,
270372e6a0cfSJed Brown                                        MatPermute_MPIAIJ,
2704f4259b30SLisandro Dalcin                                        NULL,
27057dae84e0SHong Zhang                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2706e03a110bSBarry Smith                                        MatDestroy_MPIAIJ,
2707e03a110bSBarry Smith                                        MatView_MPIAIJ,
2708f4259b30SLisandro Dalcin                                        NULL,
2709f4259b30SLisandro Dalcin                                        NULL,
2710f4259b30SLisandro Dalcin                                 /*64*/ NULL,
2711f996eeb8SHong Zhang                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2712f4259b30SLisandro Dalcin                                        NULL,
2713f4259b30SLisandro Dalcin                                        NULL,
2714f4259b30SLisandro Dalcin                                        NULL,
2715d519adbfSMatthew Knepley                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2716c87e5d42SMatthew Knepley                                        MatGetRowMinAbs_MPIAIJ,
2717f4259b30SLisandro Dalcin                                        NULL,
2718f4259b30SLisandro Dalcin                                        NULL,
2719f4259b30SLisandro Dalcin                                        NULL,
2720f4259b30SLisandro Dalcin                                        NULL,
27213acb8795SBarry Smith                                 /*75*/ MatFDColoringApply_AIJ,
2722b1b1104fSBarry Smith                                        MatSetFromOptions_MPIAIJ,
2723f4259b30SLisandro Dalcin                                        NULL,
2724f4259b30SLisandro Dalcin                                        NULL,
2725f1f41ecbSJed Brown                                        MatFindZeroDiagonals_MPIAIJ,
2726f4259b30SLisandro Dalcin                                 /*80*/ NULL,
2727f4259b30SLisandro Dalcin                                        NULL,
2728f4259b30SLisandro Dalcin                                        NULL,
27295bba2384SShri Abhyankar                                 /*83*/ MatLoad_MPIAIJ,
2730a3bbdb47SHong Zhang                                        MatIsSymmetric_MPIAIJ,
2731f4259b30SLisandro Dalcin                                        NULL,
2732f4259b30SLisandro Dalcin                                        NULL,
2733f4259b30SLisandro Dalcin                                        NULL,
2734f4259b30SLisandro Dalcin                                        NULL,
2735f4259b30SLisandro Dalcin                                 /*89*/ NULL,
2736f4259b30SLisandro Dalcin                                        NULL,
273726be0446SHong Zhang                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2738f4259b30SLisandro Dalcin                                        NULL,
2739f4259b30SLisandro Dalcin                                        NULL,
2740cf3ca8ceSHong Zhang                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2741f4259b30SLisandro Dalcin                                        NULL,
2742f4259b30SLisandro Dalcin                                        NULL,
2743f4259b30SLisandro Dalcin                                        NULL,
2744b470e4b4SRichard Tran Mills                                        MatBindToCPU_MPIAIJ,
27454222ddf1SHong Zhang                                 /*99*/ MatProductSetFromOptions_MPIAIJ,
2746f4259b30SLisandro Dalcin                                        NULL,
2747f4259b30SLisandro Dalcin                                        NULL,
27482fd7e33dSBarry Smith                                        MatConjugate_MPIAIJ,
2749f4259b30SLisandro Dalcin                                        NULL,
2750d519adbfSMatthew Knepley                                 /*104*/MatSetValuesRow_MPIAIJ,
275199cafbc1SBarry Smith                                        MatRealPart_MPIAIJ,
275269db28dcSHong Zhang                                        MatImaginaryPart_MPIAIJ,
2753f4259b30SLisandro Dalcin                                        NULL,
2754f4259b30SLisandro Dalcin                                        NULL,
2755f4259b30SLisandro Dalcin                                 /*109*/NULL,
2756f4259b30SLisandro Dalcin                                        NULL,
27575494a064SHong Zhang                                        MatGetRowMin_MPIAIJ,
2758f4259b30SLisandro Dalcin                                        NULL,
27593b49f96aSBarry Smith                                        MatMissingDiagonal_MPIAIJ,
2760d1adec66SJed Brown                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2761f4259b30SLisandro Dalcin                                        NULL,
2762c5e4d11fSDmitry Karpeev                                        MatGetGhosts_MPIAIJ,
2763f4259b30SLisandro Dalcin                                        NULL,
2764f4259b30SLisandro Dalcin                                        NULL,
2765b215bc84SStefano Zampini                                 /*119*/MatMultDiagonalBlock_MPIAIJ,
2766f4259b30SLisandro Dalcin                                        NULL,
2767f4259b30SLisandro Dalcin                                        NULL,
2768f4259b30SLisandro Dalcin                                        NULL,
2769b9614d88SDmitry Karpeev                                        MatGetMultiProcBlock_MPIAIJ,
2770f2c98031SJed Brown                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2771a873a8cdSSam Reynolds                                        MatGetColumnReductions_MPIAIJ,
2772bbead8a2SBarry Smith                                        MatInvertBlockDiagonal_MPIAIJ,
2773a8ee9fb5SBarry Smith                                        MatInvertVariableBlockDiagonal_MPIAIJ,
27747dae84e0SHong Zhang                                        MatCreateSubMatricesMPI_MPIAIJ,
2775f4259b30SLisandro Dalcin                                 /*129*/NULL,
2776f4259b30SLisandro Dalcin                                        NULL,
2777f4259b30SLisandro Dalcin                                        NULL,
2778187b3c17SHong Zhang                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2779f4259b30SLisandro Dalcin                                        NULL,
2780f4259b30SLisandro Dalcin                                 /*134*/NULL,
2781f4259b30SLisandro Dalcin                                        NULL,
2782f4259b30SLisandro Dalcin                                        NULL,
2783f4259b30SLisandro Dalcin                                        NULL,
2784f4259b30SLisandro Dalcin                                        NULL,
278546533700Sstefano_zampini                                 /*139*/MatSetBlockSizes_MPIAIJ,
2786f4259b30SLisandro Dalcin                                        NULL,
2787f4259b30SLisandro Dalcin                                        NULL,
27889c8f2541SHong Zhang                                        MatFDColoringSetUp_MPIXAIJ,
2789a0b6529bSBarry Smith                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
27904222ddf1SHong Zhang                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2791f4259b30SLisandro Dalcin                                 /*145*/NULL,
2792f4259b30SLisandro Dalcin                                        NULL,
2793f4259b30SLisandro Dalcin                                        NULL
2794bd0c2dcbSBarry Smith };
279536ce4990SBarry Smith 
27962e8a6d31SBarry Smith /* ----------------------------------------------------------------------------------------*/
27972e8a6d31SBarry Smith 
27987087cfbeSBarry Smith PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
27992e8a6d31SBarry Smith {
28002e8a6d31SBarry Smith   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
28012e8a6d31SBarry Smith 
28022e8a6d31SBarry Smith   PetscFunctionBegin;
28039566063dSJacob Faibussowitsch   PetscCall(MatStoreValues(aij->A));
28049566063dSJacob Faibussowitsch   PetscCall(MatStoreValues(aij->B));
28052e8a6d31SBarry Smith   PetscFunctionReturn(0);
28062e8a6d31SBarry Smith }
28072e8a6d31SBarry Smith 
28087087cfbeSBarry Smith PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
28092e8a6d31SBarry Smith {
28102e8a6d31SBarry Smith   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
28112e8a6d31SBarry Smith 
28122e8a6d31SBarry Smith   PetscFunctionBegin;
28139566063dSJacob Faibussowitsch   PetscCall(MatRetrieveValues(aij->A));
28149566063dSJacob Faibussowitsch   PetscCall(MatRetrieveValues(aij->B));
28152e8a6d31SBarry Smith   PetscFunctionReturn(0);
28162e8a6d31SBarry Smith }
28178a729477SBarry Smith 
28187087cfbeSBarry Smith PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2819a23d5eceSKris Buschelman {
2820a23d5eceSKris Buschelman   Mat_MPIAIJ     *b;
28215d2a9ed1SStefano Zampini   PetscMPIInt    size;
2822a23d5eceSKris Buschelman 
2823a23d5eceSKris Buschelman   PetscFunctionBegin;
28249566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->rmap));
28259566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->cmap));
2826a23d5eceSKris Buschelman   b = (Mat_MPIAIJ*)B->data;
2827899cda47SBarry Smith 
2828cb7b82ddSBarry Smith #if defined(PETSC_USE_CTABLE)
28299566063dSJacob Faibussowitsch   PetscCall(PetscTableDestroy(&b->colmap));
2830cb7b82ddSBarry Smith #else
28319566063dSJacob Faibussowitsch   PetscCall(PetscFree(b->colmap));
2832cb7b82ddSBarry Smith #endif
28339566063dSJacob Faibussowitsch   PetscCall(PetscFree(b->garray));
28349566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&b->lvec));
28359566063dSJacob Faibussowitsch   PetscCall(VecScatterDestroy(&b->Mvctx));
2836cb7b82ddSBarry Smith 
2837cb7b82ddSBarry Smith   /* Because the B will have been resized we simply destroy it and create a new one each time */
28389566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size));
28399566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&b->B));
28409566063dSJacob Faibussowitsch   PetscCall(MatCreate(PETSC_COMM_SELF,&b->B));
28419566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0));
28429566063dSJacob Faibussowitsch   PetscCall(MatSetBlockSizesFromMats(b->B,B,B));
28439566063dSJacob Faibussowitsch   PetscCall(MatSetType(b->B,MATSEQAIJ));
28449566063dSJacob Faibussowitsch   PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->B));
2845cb7b82ddSBarry Smith 
2846cb7b82ddSBarry Smith   if (!B->preallocated) {
28479566063dSJacob Faibussowitsch     PetscCall(MatCreate(PETSC_COMM_SELF,&b->A));
28489566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n));
28499566063dSJacob Faibussowitsch     PetscCall(MatSetBlockSizesFromMats(b->A,B,B));
28509566063dSJacob Faibussowitsch     PetscCall(MatSetType(b->A,MATSEQAIJ));
28519566063dSJacob Faibussowitsch     PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->A));
2852526dfc15SBarry Smith   }
2853899cda47SBarry Smith 
28549566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz));
28559566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz));
2856526dfc15SBarry Smith   B->preallocated  = PETSC_TRUE;
2857cb7b82ddSBarry Smith   B->was_assembled = PETSC_FALSE;
285815001458SStefano Zampini   B->assembled     = PETSC_FALSE;
2859a23d5eceSKris Buschelman   PetscFunctionReturn(0);
2860a23d5eceSKris Buschelman }
2861a23d5eceSKris Buschelman 
2862846b4da1SFande Kong PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2863846b4da1SFande Kong {
2864846b4da1SFande Kong   Mat_MPIAIJ     *b;
2865846b4da1SFande Kong 
2866846b4da1SFande Kong   PetscFunctionBegin;
2867846b4da1SFande Kong   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
28689566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->rmap));
28699566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->cmap));
2870846b4da1SFande Kong   b = (Mat_MPIAIJ*)B->data;
2871846b4da1SFande Kong 
2872846b4da1SFande Kong #if defined(PETSC_USE_CTABLE)
28739566063dSJacob Faibussowitsch   PetscCall(PetscTableDestroy(&b->colmap));
2874846b4da1SFande Kong #else
28759566063dSJacob Faibussowitsch   PetscCall(PetscFree(b->colmap));
2876846b4da1SFande Kong #endif
28779566063dSJacob Faibussowitsch   PetscCall(PetscFree(b->garray));
28789566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&b->lvec));
28799566063dSJacob Faibussowitsch   PetscCall(VecScatterDestroy(&b->Mvctx));
2880846b4da1SFande Kong 
28819566063dSJacob Faibussowitsch   PetscCall(MatResetPreallocation(b->A));
28829566063dSJacob Faibussowitsch   PetscCall(MatResetPreallocation(b->B));
2883846b4da1SFande Kong   B->preallocated  = PETSC_TRUE;
2884846b4da1SFande Kong   B->was_assembled = PETSC_FALSE;
2885846b4da1SFande Kong   B->assembled = PETSC_FALSE;
2886846b4da1SFande Kong   PetscFunctionReturn(0);
2887846b4da1SFande Kong }
2888846b4da1SFande Kong 
2889dfbe8321SBarry Smith PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2890d6dfbf8fSBarry Smith {
2891d6dfbf8fSBarry Smith   Mat            mat;
2892416022c9SBarry Smith   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2893d6dfbf8fSBarry Smith 
28943a40ed3dSBarry Smith   PetscFunctionBegin;
2895f4259b30SLisandro Dalcin   *newmat = NULL;
28969566063dSJacob Faibussowitsch   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin),&mat));
28979566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N));
28989566063dSJacob Faibussowitsch   PetscCall(MatSetBlockSizesFromMats(mat,matin,matin));
28999566063dSJacob Faibussowitsch   PetscCall(MatSetType(mat,((PetscObject)matin)->type_name));
2900273d9f13SBarry Smith   a       = (Mat_MPIAIJ*)mat->data;
2901e1b6402fSHong Zhang 
2902d5f3da31SBarry Smith   mat->factortype   = matin->factortype;
2903501880eeSStefano Zampini   mat->assembled    = matin->assembled;
2904e7641de0SSatish Balay   mat->insertmode   = NOT_SET_VALUES;
2905501880eeSStefano Zampini   mat->preallocated = matin->preallocated;
2906d6dfbf8fSBarry Smith 
290717699dbbSLois Curfman McInnes   a->size         = oldmat->size;
290817699dbbSLois Curfman McInnes   a->rank         = oldmat->rank;
2909e7641de0SSatish Balay   a->donotstash   = oldmat->donotstash;
2910e7641de0SSatish Balay   a->roworiented  = oldmat->roworiented;
2911501880eeSStefano Zampini   a->rowindices   = NULL;
2912501880eeSStefano Zampini   a->rowvalues    = NULL;
2913bcd2baecSBarry Smith   a->getrowactive = PETSC_FALSE;
2914d6dfbf8fSBarry Smith 
29159566063dSJacob Faibussowitsch   PetscCall(PetscLayoutReference(matin->rmap,&mat->rmap));
29169566063dSJacob Faibussowitsch   PetscCall(PetscLayoutReference(matin->cmap,&mat->cmap));
2917899cda47SBarry Smith 
29182ee70a88SLois Curfman McInnes   if (oldmat->colmap) {
2919aa482453SBarry Smith #if defined(PETSC_USE_CTABLE)
29209566063dSJacob Faibussowitsch     PetscCall(PetscTableCreateCopy(oldmat->colmap,&a->colmap));
2921b1fc9764SSatish Balay #else
29229566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(mat->cmap->N,&a->colmap));
29239566063dSJacob Faibussowitsch     PetscCall(PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt)));
29249566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N));
2925b1fc9764SSatish Balay #endif
2926501880eeSStefano Zampini   } else a->colmap = NULL;
29273f41c07dSBarry Smith   if (oldmat->garray) {
2928b1d57f15SBarry Smith     PetscInt len;
2929d0f46423SBarry Smith     len  = oldmat->B->cmap->n;
29309566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(len+1,&a->garray));
29319566063dSJacob Faibussowitsch     PetscCall(PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt)));
29329566063dSJacob Faibussowitsch     if (len) PetscCall(PetscArraycpy(a->garray,oldmat->garray,len));
2933501880eeSStefano Zampini   } else a->garray = NULL;
2934d6dfbf8fSBarry Smith 
29350de76c62SStefano Zampini   /* It may happen MatDuplicate is called with a non-assembled matrix
29360de76c62SStefano Zampini      In fact, MatDuplicate only requires the matrix to be preallocated
29370de76c62SStefano Zampini      This may happen inside a DMCreateMatrix_Shell */
29380de76c62SStefano Zampini   if (oldmat->lvec) {
29399566063dSJacob Faibussowitsch     PetscCall(VecDuplicate(oldmat->lvec,&a->lvec));
29409566063dSJacob Faibussowitsch     PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec));
29410de76c62SStefano Zampini   }
29420de76c62SStefano Zampini   if (oldmat->Mvctx) {
29439566063dSJacob Faibussowitsch     PetscCall(VecScatterCopy(oldmat->Mvctx,&a->Mvctx));
29449566063dSJacob Faibussowitsch     PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx));
29450de76c62SStefano Zampini   }
29469566063dSJacob Faibussowitsch   PetscCall(MatDuplicate(oldmat->A,cpvalues,&a->A));
29479566063dSJacob Faibussowitsch   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A));
29489566063dSJacob Faibussowitsch   PetscCall(MatDuplicate(oldmat->B,cpvalues,&a->B));
29499566063dSJacob Faibussowitsch   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B));
29509566063dSJacob Faibussowitsch   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist));
29518a729477SBarry Smith   *newmat = mat;
29523a40ed3dSBarry Smith   PetscFunctionReturn(0);
29538a729477SBarry Smith }
2954416022c9SBarry Smith 
2955112444f4SShri Abhyankar PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
29568fb81238SShri Abhyankar {
295752f91c60SVaclav Hapla   PetscBool      isbinary, ishdf5;
295852f91c60SVaclav Hapla 
295952f91c60SVaclav Hapla   PetscFunctionBegin;
296052f91c60SVaclav Hapla   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
296152f91c60SVaclav Hapla   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2962c27b3999SVaclav Hapla   /* force binary viewer to load .info file if it has not yet done so */
29639566063dSJacob Faibussowitsch   PetscCall(PetscViewerSetUp(viewer));
29649566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
29659566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5));
296652f91c60SVaclav Hapla   if (isbinary) {
29679566063dSJacob Faibussowitsch     PetscCall(MatLoad_MPIAIJ_Binary(newMat,viewer));
296852f91c60SVaclav Hapla   } else if (ishdf5) {
296952f91c60SVaclav Hapla #if defined(PETSC_HAVE_HDF5)
29709566063dSJacob Faibussowitsch     PetscCall(MatLoad_AIJ_HDF5(newMat,viewer));
297152f91c60SVaclav Hapla #else
297252f91c60SVaclav Hapla     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
297352f91c60SVaclav Hapla #endif
297452f91c60SVaclav Hapla   } else {
297598921bdaSJacob Faibussowitsch     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
297652f91c60SVaclav Hapla   }
297752f91c60SVaclav Hapla   PetscFunctionReturn(0);
297852f91c60SVaclav Hapla }
297952f91c60SVaclav Hapla 
29803ea6fe3dSLisandro Dalcin PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
298152f91c60SVaclav Hapla {
29823ea6fe3dSLisandro Dalcin   PetscInt       header[4],M,N,m,nz,rows,cols,sum,i;
29833ea6fe3dSLisandro Dalcin   PetscInt       *rowidxs,*colidxs;
29843ea6fe3dSLisandro Dalcin   PetscScalar    *matvals;
29858fb81238SShri Abhyankar 
29868fb81238SShri Abhyankar   PetscFunctionBegin;
29879566063dSJacob Faibussowitsch   PetscCall(PetscViewerSetUp(viewer));
29888fb81238SShri Abhyankar 
29893ea6fe3dSLisandro Dalcin   /* read in matrix header */
29909566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT));
299108401ef6SPierre Jolivet   PetscCheck(header[0] == MAT_FILE_CLASSID,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
29923ea6fe3dSLisandro Dalcin   M  = header[1]; N = header[2]; nz = header[3];
299308401ef6SPierre Jolivet   PetscCheck(M >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%" PetscInt_FMT ") in file is negative",M);
299408401ef6SPierre Jolivet   PetscCheck(N >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%" PetscInt_FMT ") in file is negative",N);
299508401ef6SPierre Jolivet   PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ");
299608ea439dSMark F. Adams 
29973ea6fe3dSLisandro Dalcin   /* set block sizes from the viewer's .info file */
29989566063dSJacob Faibussowitsch   PetscCall(MatLoad_Binary_BlockSizes(mat,viewer));
29993ea6fe3dSLisandro Dalcin   /* set global sizes if not set already */
30003ea6fe3dSLisandro Dalcin   if (mat->rmap->N < 0) mat->rmap->N = M;
30013ea6fe3dSLisandro Dalcin   if (mat->cmap->N < 0) mat->cmap->N = N;
30029566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(mat->rmap));
30039566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(mat->cmap));
30048fb81238SShri Abhyankar 
30053ea6fe3dSLisandro Dalcin   /* check if the matrix sizes are correct */
30069566063dSJacob Faibussowitsch   PetscCall(MatGetSize(mat,&rows,&cols));
3007*aed4548fSBarry Smith   PetscCheck(M == rows && N == cols,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")",M,N,rows,cols);
30088fb81238SShri Abhyankar 
30093ea6fe3dSLisandro Dalcin   /* read in row lengths and build row indices */
30109566063dSJacob Faibussowitsch   PetscCall(MatGetLocalSize(mat,&m,NULL));
30119566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(m+1,&rowidxs));
30129566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT));
30133ea6fe3dSLisandro Dalcin   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
30141c2dc1cbSBarry Smith   PetscCall(MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer)));
301508401ef6SPierre Jolivet   PetscCheck(sum == nz,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT,nz,sum);
30163ea6fe3dSLisandro Dalcin   /* read in column indices and matrix values */
30179566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals));
30189566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT));
30199566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR));
30203ea6fe3dSLisandro Dalcin   /* store matrix indices and values */
30219566063dSJacob Faibussowitsch   PetscCall(MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals));
30229566063dSJacob Faibussowitsch   PetscCall(PetscFree(rowidxs));
30239566063dSJacob Faibussowitsch   PetscCall(PetscFree2(colidxs,matvals));
30248fb81238SShri Abhyankar   PetscFunctionReturn(0);
30258fb81238SShri Abhyankar }
30268fb81238SShri Abhyankar 
30273782ecc7SHong Zhang /* Not scalable because of ISAllGather() unless getting all columns. */
30288b3fa1f7SHong Zhang PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
30294aa3045dSJed Brown {
30304aa3045dSJed Brown   IS             iscol_local;
3031c5e4d11fSDmitry Karpeev   PetscBool      isstride;
3032c5e4d11fSDmitry Karpeev   PetscMPIInt    lisstride=0,gisstride;
30333782ecc7SHong Zhang 
30343782ecc7SHong Zhang   PetscFunctionBegin;
30353782ecc7SHong Zhang   /* check if we are grabbing all columns*/
30369566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride));
30373782ecc7SHong Zhang 
3038c5e4d11fSDmitry Karpeev   if (isstride) {
3039c5e4d11fSDmitry Karpeev     PetscInt  start,len,mstart,mlen;
30409566063dSJacob Faibussowitsch     PetscCall(ISStrideGetInfo(iscol,&start,NULL));
30419566063dSJacob Faibussowitsch     PetscCall(ISGetLocalSize(iscol,&len));
30429566063dSJacob Faibussowitsch     PetscCall(MatGetOwnershipRangeColumn(mat,&mstart,&mlen));
3043c5e4d11fSDmitry Karpeev     if (mstart == start && mlen-mstart == len) lisstride = 1;
3044c5e4d11fSDmitry Karpeev   }
30453782ecc7SHong Zhang 
30461c2dc1cbSBarry Smith   PetscCall(MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat)));
3047c5e4d11fSDmitry Karpeev   if (gisstride) {
3048c5e4d11fSDmitry Karpeev     PetscInt N;
30499566063dSJacob Faibussowitsch     PetscCall(MatGetSize(mat,NULL,&N));
30509566063dSJacob Faibussowitsch     PetscCall(ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local));
30519566063dSJacob Faibussowitsch     PetscCall(ISSetIdentity(iscol_local));
30529566063dSJacob Faibussowitsch     PetscCall(PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n"));
3053c5e4d11fSDmitry Karpeev   } else {
3054c5bfad50SMark F. Adams     PetscInt cbs;
30559566063dSJacob Faibussowitsch     PetscCall(ISGetBlockSize(iscol,&cbs));
30569566063dSJacob Faibussowitsch     PetscCall(ISAllGather(iscol,&iscol_local));
30579566063dSJacob Faibussowitsch     PetscCall(ISSetBlockSize(iscol_local,cbs));
3058b79d0421SJed Brown   }
30593782ecc7SHong Zhang 
30603782ecc7SHong Zhang   *isseq = iscol_local;
30613782ecc7SHong Zhang   PetscFunctionReturn(0);
3062c5e4d11fSDmitry Karpeev }
30638d2139bdSHong Zhang 
3064ddfdf956SHong Zhang /*
30659c988bcaSHong Zhang  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
30669c988bcaSHong Zhang  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3067ddfdf956SHong Zhang 
3068ddfdf956SHong Zhang  Input Parameters:
3069ddfdf956SHong Zhang    mat - matrix
30709c988bcaSHong Zhang    isrow - parallel row index set; its local indices are a subset of local columns of mat,
30719c988bcaSHong Zhang            i.e., mat->rstart <= isrow[i] < mat->rend
3072ddfdf956SHong Zhang    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3073ddfdf956SHong Zhang            i.e., mat->cstart <= iscol[i] < mat->cend
3074ddfdf956SHong Zhang  Output Parameter:
30759c988bcaSHong Zhang    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
30769c988bcaSHong Zhang    iscol_o - sequential column index set for retrieving mat->B
30779c988bcaSHong Zhang    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3078ddfdf956SHong Zhang  */
30799c988bcaSHong Zhang PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
30803782ecc7SHong Zhang {
3081040216a4SHong Zhang   Vec            x,cmap;
3082040216a4SHong Zhang   const PetscInt *is_idx;
3083040216a4SHong Zhang   PetscScalar    *xarray,*cmaparray;
30849c988bcaSHong Zhang   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3085040216a4SHong Zhang   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3086040216a4SHong Zhang   Mat            B=a->B;
3087040216a4SHong Zhang   Vec            lvec=a->lvec,lcmap;
3088a31a438cSHong Zhang   PetscInt       i,cstart,cend,Bn=B->cmap->N;
30898b3fa1f7SHong Zhang   MPI_Comm       comm;
30903a8d973cSHong Zhang   VecScatter     Mvctx=a->Mvctx;
30913782ecc7SHong Zhang 
30923782ecc7SHong Zhang   PetscFunctionBegin;
30939566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
30949566063dSJacob Faibussowitsch   PetscCall(ISGetLocalSize(iscol,&ncols));
30958b3fa1f7SHong Zhang 
3096ddfdf956SHong Zhang   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
30979566063dSJacob Faibussowitsch   PetscCall(MatCreateVecs(mat,&x,NULL));
30989566063dSJacob Faibussowitsch   PetscCall(VecSet(x,-1.0));
30999566063dSJacob Faibussowitsch   PetscCall(VecDuplicate(x,&cmap));
31009566063dSJacob Faibussowitsch   PetscCall(VecSet(cmap,-1.0));
31010a351717SHong Zhang 
31029c988bcaSHong Zhang   /* Get start indices */
31039566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm));
3104ddfdf956SHong Zhang   isstart -= ncols;
31059566063dSJacob Faibussowitsch   PetscCall(MatGetOwnershipRangeColumn(mat,&cstart,&cend));
3106040216a4SHong Zhang 
31079566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(iscol,&is_idx));
31089566063dSJacob Faibussowitsch   PetscCall(VecGetArray(x,&xarray));
31099566063dSJacob Faibussowitsch   PetscCall(VecGetArray(cmap,&cmaparray));
31109566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(ncols,&idx));
3111ddfdf956SHong Zhang   for (i=0; i<ncols; i++) {
31128b3fa1f7SHong Zhang     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3113ddfdf956SHong Zhang     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
31149c988bcaSHong Zhang     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
31158b3fa1f7SHong Zhang   }
31169566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(x,&xarray));
31179566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(cmap,&cmaparray));
31189566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol,&is_idx));
31198b3fa1f7SHong Zhang 
31209c988bcaSHong Zhang   /* Get iscol_d */
31219566063dSJacob Faibussowitsch   PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d));
31229566063dSJacob Faibussowitsch   PetscCall(ISGetBlockSize(iscol,&i));
31239566063dSJacob Faibussowitsch   PetscCall(ISSetBlockSize(*iscol_d,i));
3124feb78a15SHong Zhang 
31259c988bcaSHong Zhang   /* Get isrow_d */
31269566063dSJacob Faibussowitsch   PetscCall(ISGetLocalSize(isrow,&m));
3127feb78a15SHong Zhang   rstart = mat->rmap->rstart;
31289566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(m,&idx));
31299566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isrow,&is_idx));
31309c988bcaSHong Zhang   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
31319566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow,&is_idx));
3132feb78a15SHong Zhang 
31339566063dSJacob Faibussowitsch   PetscCall(ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d));
31349566063dSJacob Faibussowitsch   PetscCall(ISGetBlockSize(isrow,&i));
31359566063dSJacob Faibussowitsch   PetscCall(ISSetBlockSize(*isrow_d,i));
3136feb78a15SHong Zhang 
31379c988bcaSHong Zhang   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
31389566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD));
31399566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD));
3140ddfdf956SHong Zhang 
31419566063dSJacob Faibussowitsch   PetscCall(VecDuplicate(lvec,&lcmap));
314207250d77SHong Zhang 
31439566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD));
31449566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD));
314564efcef9SHong Zhang 
31469c988bcaSHong Zhang   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3147ddfdf956SHong Zhang   /* off-process column indices */
31489c988bcaSHong Zhang   count = 0;
31499566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(Bn,&idx));
31509566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(Bn,&cmap1));
3151feb78a15SHong Zhang 
31529566063dSJacob Faibussowitsch   PetscCall(VecGetArray(lvec,&xarray));
31539566063dSJacob Faibussowitsch   PetscCall(VecGetArray(lcmap,&cmaparray));
31548b3fa1f7SHong Zhang   for (i=0; i<Bn; i++) {
3155f73421bfSHong Zhang     if (PetscRealPart(xarray[i]) > -1.0) {
31569c988bcaSHong Zhang       idx[count]     = i;                   /* local column index in off-diagonal part B */
31571c645242SHong Zhang       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
31581c645242SHong Zhang       count++;
31598b3fa1f7SHong Zhang     }
31608b3fa1f7SHong Zhang   }
31619566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(lvec,&xarray));
31629566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(lcmap,&cmaparray));
316307250d77SHong Zhang 
31649566063dSJacob Faibussowitsch   PetscCall(ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o));
3165b6d9b4e0SHong Zhang   /* cannot ensure iscol_o has same blocksize as iscol! */
3166b6d9b4e0SHong Zhang 
31679566063dSJacob Faibussowitsch   PetscCall(PetscFree(idx));
31689c988bcaSHong Zhang   *garray = cmap1;
31699c988bcaSHong Zhang 
31709566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&x));
31719566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&cmap));
31729566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&lcmap));
3173040216a4SHong Zhang   PetscFunctionReturn(0);
3174040216a4SHong Zhang }
3175040216a4SHong Zhang 
3176b20e2604SHong Zhang /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
31773b00a383SHong Zhang PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
31783b00a383SHong Zhang {
3179b20e2604SHong Zhang   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
31801fd43edeSHong Zhang   Mat            M = NULL;
31813b00a383SHong Zhang   MPI_Comm       comm;
3182b20e2604SHong Zhang   IS             iscol_d,isrow_d,iscol_o;
31833b00a383SHong Zhang   Mat            Asub = NULL,Bsub = NULL;
3184b20e2604SHong Zhang   PetscInt       n;
31853b00a383SHong Zhang 
31863b00a383SHong Zhang   PetscFunctionBegin;
31879566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
31883b00a383SHong Zhang 
31893b00a383SHong Zhang   if (call == MAT_REUSE_MATRIX) {
3190b20e2604SHong Zhang     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
31919566063dSJacob Faibussowitsch     PetscCall(PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d));
319228b400f6SJacob Faibussowitsch     PetscCheck(isrow_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
31933b00a383SHong Zhang 
31949566063dSJacob Faibussowitsch     PetscCall(PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d));
319528b400f6SJacob Faibussowitsch     PetscCheck(iscol_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
31963b00a383SHong Zhang 
31979566063dSJacob Faibussowitsch     PetscCall(PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o));
319828b400f6SJacob Faibussowitsch     PetscCheck(iscol_o,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
31993b00a383SHong Zhang 
3200b20e2604SHong Zhang     /* Update diagonal and off-diagonal portions of submat */
3201b20e2604SHong Zhang     asub = (Mat_MPIAIJ*)(*submat)->data;
32029566063dSJacob Faibussowitsch     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A));
32039566063dSJacob Faibussowitsch     PetscCall(ISGetLocalSize(iscol_o,&n));
32047cfce09cSHong Zhang     if (n) {
32059566063dSJacob Faibussowitsch       PetscCall(MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B));
32067cfce09cSHong Zhang     }
32079566063dSJacob Faibussowitsch     PetscCall(MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY));
32089566063dSJacob Faibussowitsch     PetscCall(MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY));
32093b00a383SHong Zhang 
32103b00a383SHong Zhang   } else { /* call == MAT_INITIAL_MATRIX) */
32119c988bcaSHong Zhang     const PetscInt *garray;
3212b20e2604SHong Zhang     PetscInt        BsubN;
32133b00a383SHong Zhang 
3214b20e2604SHong Zhang     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
32159566063dSJacob Faibussowitsch     PetscCall(ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray));
32163b00a383SHong Zhang 
3217b20e2604SHong Zhang     /* Create local submatrices Asub and Bsub */
32189566063dSJacob Faibussowitsch     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub));
32199566063dSJacob Faibussowitsch     PetscCall(MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub));
32203b00a383SHong Zhang 
32219c988bcaSHong Zhang     /* Create submatrix M */
32229566063dSJacob Faibussowitsch     PetscCall(MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M));
32233b00a383SHong Zhang 
3224b20e2604SHong Zhang     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3225b20e2604SHong Zhang     asub = (Mat_MPIAIJ*)M->data;
32267cfce09cSHong Zhang 
32279566063dSJacob Faibussowitsch     PetscCall(ISGetLocalSize(iscol_o,&BsubN));
3228b20e2604SHong Zhang     n = asub->B->cmap->N;
3229b20e2604SHong Zhang     if (BsubN > n) {
3230c4762a1bSJed Brown       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
32317cfce09cSHong Zhang       const PetscInt *idx;
32329c988bcaSHong Zhang       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
32339566063dSJacob Faibussowitsch       PetscCall(PetscInfo(M,"submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n",n,BsubN));
32347cfce09cSHong Zhang 
32359566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(n,&idx_new));
32367cfce09cSHong Zhang       j = 0;
32379566063dSJacob Faibussowitsch       PetscCall(ISGetIndices(iscol_o,&idx));
3238b20e2604SHong Zhang       for (i=0; i<n; i++) {
32397cfce09cSHong Zhang         if (j >= BsubN) break;
32409c988bcaSHong Zhang         while (subgarray[i] > garray[j]) j++;
32417cfce09cSHong Zhang 
32429c988bcaSHong Zhang         if (subgarray[i] == garray[j]) {
32437cfce09cSHong Zhang           idx_new[i] = idx[j++];
324498921bdaSJacob Faibussowitsch         } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT,i,subgarray[i],j,garray[j]);
32457cfce09cSHong Zhang       }
32469566063dSJacob Faibussowitsch       PetscCall(ISRestoreIndices(iscol_o,&idx));
32477cfce09cSHong Zhang 
32489566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&iscol_o));
32499566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o));
32507cfce09cSHong Zhang 
3251b20e2604SHong Zhang     } else if (BsubN < n) {
325298921bdaSJacob Faibussowitsch       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")",BsubN,asub->B->cmap->N);
3253b20e2604SHong Zhang     }
32547cfce09cSHong Zhang 
32559566063dSJacob Faibussowitsch     PetscCall(PetscFree(garray));
3256b20e2604SHong Zhang     *submat = M;
32573b00a383SHong Zhang 
3258e489de8fSHong Zhang     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
32599566063dSJacob Faibussowitsch     PetscCall(PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d));
32609566063dSJacob Faibussowitsch     PetscCall(ISDestroy(&isrow_d));
32613b00a383SHong Zhang 
32629566063dSJacob Faibussowitsch     PetscCall(PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d));
32639566063dSJacob Faibussowitsch     PetscCall(ISDestroy(&iscol_d));
32643b00a383SHong Zhang 
32659566063dSJacob Faibussowitsch     PetscCall(PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o));
32669566063dSJacob Faibussowitsch     PetscCall(ISDestroy(&iscol_o));
32673b00a383SHong Zhang   }
32683b00a383SHong Zhang   PetscFunctionReturn(0);
32693b00a383SHong Zhang }
32703b00a383SHong Zhang 
32713782ecc7SHong Zhang PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
32723782ecc7SHong Zhang {
32731358a193SHong Zhang   IS             iscol_local=NULL,isrow_d;
32743782ecc7SHong Zhang   PetscInt       csize;
327518e627e3SHong Zhang   PetscInt       n,i,j,start,end;
32764a3daf6eSHong Zhang   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
32773782ecc7SHong Zhang   MPI_Comm       comm;
32783782ecc7SHong Zhang 
32793782ecc7SHong Zhang   PetscFunctionBegin;
3280bcae8d28SHong Zhang   /* If isrow has same processor distribution as mat,
3281a31a438cSHong Zhang      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
32828f69fa7bSHong Zhang   if (call == MAT_REUSE_MATRIX) {
32839566063dSJacob Faibussowitsch     PetscCall(PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d));
3284d5761cdaSHong Zhang     if (isrow_d) {
3285d5761cdaSHong Zhang       sameRowDist  = PETSC_TRUE;
3286d5761cdaSHong Zhang       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3287d5761cdaSHong Zhang     } else {
32889566063dSJacob Faibussowitsch       PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local));
3289d5761cdaSHong Zhang       if (iscol_local) {
3290d5761cdaSHong Zhang         sameRowDist  = PETSC_TRUE;
3291d5761cdaSHong Zhang         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3292d5761cdaSHong Zhang       }
3293d5761cdaSHong Zhang     }
32948f69fa7bSHong Zhang   } else {
3295e489de8fSHong Zhang     /* Check if isrow has same processor distribution as mat */
329618e627e3SHong Zhang     sameDist[0] = PETSC_FALSE;
32979566063dSJacob Faibussowitsch     PetscCall(ISGetLocalSize(isrow,&n));
32983782ecc7SHong Zhang     if (!n) {
329918e627e3SHong Zhang       sameDist[0] = PETSC_TRUE;
33003782ecc7SHong Zhang     } else {
33019566063dSJacob Faibussowitsch       PetscCall(ISGetMinMax(isrow,&i,&j));
33029566063dSJacob Faibussowitsch       PetscCall(MatGetOwnershipRange(mat,&start,&end));
330318e627e3SHong Zhang       if (i >= start && j < end) {
330418e627e3SHong Zhang         sameDist[0] = PETSC_TRUE;
33053782ecc7SHong Zhang       }
33068f69fa7bSHong Zhang     }
33073782ecc7SHong Zhang 
3308e489de8fSHong Zhang     /* Check if iscol has same processor distribution as mat */
330918e627e3SHong Zhang     sameDist[1] = PETSC_FALSE;
33109566063dSJacob Faibussowitsch     PetscCall(ISGetLocalSize(iscol,&n));
331118e627e3SHong Zhang     if (!n) {
331218e627e3SHong Zhang       sameDist[1] = PETSC_TRUE;
331318e627e3SHong Zhang     } else {
33149566063dSJacob Faibussowitsch       PetscCall(ISGetMinMax(iscol,&i,&j));
33159566063dSJacob Faibussowitsch       PetscCall(MatGetOwnershipRangeColumn(mat,&start,&end));
331618e627e3SHong Zhang       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
331718e627e3SHong Zhang     }
331818e627e3SHong Zhang 
33199566063dSJacob Faibussowitsch     PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
33201c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm));
332118e627e3SHong Zhang     sameRowDist = tsameDist[0];
332218e627e3SHong Zhang   }
332318e627e3SHong Zhang 
332418e627e3SHong Zhang   if (sameRowDist) {
3325b20e2604SHong Zhang     if (tsameDist[1]) { /* sameRowDist & sameColDist */
33263b00a383SHong Zhang       /* isrow and iscol have same processor distribution as mat */
33279566063dSJacob Faibussowitsch       PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat));
33281358a193SHong Zhang       PetscFunctionReturn(0);
3329b20e2604SHong Zhang     } else { /* sameRowDist */
33303b00a383SHong Zhang       /* isrow has same processor distribution as mat */
33311358a193SHong Zhang       if (call == MAT_INITIAL_MATRIX) {
33321358a193SHong Zhang         PetscBool sorted;
33339566063dSJacob Faibussowitsch         PetscCall(ISGetSeqIS_Private(mat,iscol,&iscol_local));
33349566063dSJacob Faibussowitsch         PetscCall(ISGetLocalSize(iscol_local,&n)); /* local size of iscol_local = global columns of newmat */
33359566063dSJacob Faibussowitsch         PetscCall(ISGetSize(iscol,&i));
333608401ef6SPierre Jolivet         PetscCheck(n == i,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT,n,i);
33371358a193SHong Zhang 
33389566063dSJacob Faibussowitsch         PetscCall(ISSorted(iscol_local,&sorted));
33391358a193SHong Zhang         if (sorted) {
33401358a193SHong Zhang           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
33419566063dSJacob Faibussowitsch           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat));
33423782ecc7SHong Zhang           PetscFunctionReturn(0);
33433782ecc7SHong Zhang         }
33441358a193SHong Zhang       } else { /* call == MAT_REUSE_MATRIX */
334548c0d076SHong Zhang         IS iscol_sub;
33469566063dSJacob Faibussowitsch         PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub));
334748c0d076SHong Zhang         if (iscol_sub) {
33489566063dSJacob Faibussowitsch           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat));
334948c0d076SHong Zhang           PetscFunctionReturn(0);
335048c0d076SHong Zhang         }
33511358a193SHong Zhang       }
33521358a193SHong Zhang     }
33531358a193SHong Zhang   }
33543782ecc7SHong Zhang 
3355bcae8d28SHong Zhang   /* General case: iscol -> iscol_local which has global size of iscol */
33563782ecc7SHong Zhang   if (call == MAT_REUSE_MATRIX) {
33579566063dSJacob Faibussowitsch     PetscCall(PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local));
335828b400f6SJacob Faibussowitsch     PetscCheck(iscol_local,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
33593782ecc7SHong Zhang   } else {
33601358a193SHong Zhang     if (!iscol_local) {
33619566063dSJacob Faibussowitsch       PetscCall(ISGetSeqIS_Private(mat,iscol,&iscol_local));
33623782ecc7SHong Zhang     }
33631358a193SHong Zhang   }
33643782ecc7SHong Zhang 
33659566063dSJacob Faibussowitsch   PetscCall(ISGetLocalSize(iscol,&csize));
33669566063dSJacob Faibussowitsch   PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat));
33678f69fa7bSHong Zhang 
3368b79d0421SJed Brown   if (call == MAT_INITIAL_MATRIX) {
33699566063dSJacob Faibussowitsch     PetscCall(PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local));
33709566063dSJacob Faibussowitsch     PetscCall(ISDestroy(&iscol_local));
3371b79d0421SJed Brown   }
33724aa3045dSJed Brown   PetscFunctionReturn(0);
33734aa3045dSJed Brown }
33744aa3045dSJed Brown 
3375feb78a15SHong Zhang /*@C
3376feb78a15SHong Zhang      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3377feb78a15SHong Zhang          and "off-diagonal" part of the matrix in CSR format.
3378feb78a15SHong Zhang 
3379d083f849SBarry Smith    Collective
3380feb78a15SHong Zhang 
3381feb78a15SHong Zhang    Input Parameters:
3382feb78a15SHong Zhang +  comm - MPI communicator
3383feb78a15SHong Zhang .  A - "diagonal" portion of matrix
3384b20e2604SHong Zhang .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3385feb78a15SHong Zhang -  garray - global index of B columns
3386feb78a15SHong Zhang 
3387feb78a15SHong Zhang    Output Parameter:
3388d5761cdaSHong Zhang .   mat - the matrix, with input A as its local diagonal matrix
3389feb78a15SHong Zhang    Level: advanced
3390feb78a15SHong Zhang 
3391feb78a15SHong Zhang    Notes:
3392d5761cdaSHong Zhang        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3393d5761cdaSHong Zhang        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3394feb78a15SHong Zhang 
3395feb78a15SHong Zhang .seealso: MatCreateMPIAIJWithSplitArrays()
3396feb78a15SHong Zhang @*/
3397feb78a15SHong Zhang PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3398feb78a15SHong Zhang {
3399feb78a15SHong Zhang   Mat_MPIAIJ        *maij;
3400e489de8fSHong Zhang   Mat_SeqAIJ        *b=(Mat_SeqAIJ*)B->data,*bnew;
3401a5348796SHong Zhang   PetscInt          *oi=b->i,*oj=b->j,i,nz,col;
3402ce496241SStefano Zampini   const PetscScalar *oa;
3403e489de8fSHong Zhang   Mat               Bnew;
3404feb78a15SHong Zhang   PetscInt          m,n,N;
3405feb78a15SHong Zhang 
3406feb78a15SHong Zhang   PetscFunctionBegin;
34079566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm,mat));
34089566063dSJacob Faibussowitsch   PetscCall(MatGetSize(A,&m,&n));
340908401ef6SPierre Jolivet   PetscCheck(m == B->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %" PetscInt_FMT " != Bm %" PetscInt_FMT,m,B->rmap->N);
341008401ef6SPierre Jolivet   PetscCheck(A->rmap->bs == B->rmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT,A->rmap->bs,B->rmap->bs);
3411b6d9b4e0SHong Zhang   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
341208401ef6SPierre Jolivet   /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */
3413feb78a15SHong Zhang 
3414e489de8fSHong Zhang   /* Get global columns of mat */
34151c2dc1cbSBarry Smith   PetscCall(MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm));
3416feb78a15SHong Zhang 
34179566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*mat,m,n,PETSC_DECIDE,N));
34189566063dSJacob Faibussowitsch   PetscCall(MatSetType(*mat,MATMPIAIJ));
34199566063dSJacob Faibussowitsch   PetscCall(MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs));
3420feb78a15SHong Zhang   maij = (Mat_MPIAIJ*)(*mat)->data;
3421feb78a15SHong Zhang 
3422feb78a15SHong Zhang   (*mat)->preallocated = PETSC_TRUE;
3423feb78a15SHong Zhang 
34249566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp((*mat)->rmap));
34259566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp((*mat)->cmap));
3426feb78a15SHong Zhang 
3427e489de8fSHong Zhang   /* Set A as diagonal portion of *mat */
3428feb78a15SHong Zhang   maij->A = A;
3429feb78a15SHong Zhang 
3430a5348796SHong Zhang   nz = oi[m];
3431a5348796SHong Zhang   for (i=0; i<nz; i++) {
3432a5348796SHong Zhang     col   = oj[i];
3433a5348796SHong Zhang     oj[i] = garray[col];
3434feb78a15SHong Zhang   }
3435feb78a15SHong Zhang 
3436e489de8fSHong Zhang   /* Set Bnew as off-diagonal portion of *mat */
34379566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayRead(B,&oa));
34389566063dSJacob Faibussowitsch   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,(PetscScalar*)oa,&Bnew));
34399566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayRead(B,&oa));
3440e489de8fSHong Zhang   bnew        = (Mat_SeqAIJ*)Bnew->data;
3441e489de8fSHong Zhang   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3442e489de8fSHong Zhang   maij->B     = Bnew;
3443d5761cdaSHong Zhang 
344408401ef6SPierre Jolivet   PetscCheck(B->rmap->N == Bnew->rmap->N,PETSC_COMM_SELF,PETSC_ERR_PLIB,"BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT,B->rmap->N,Bnew->rmap->N);
3445d5761cdaSHong Zhang 
3446e489de8fSHong Zhang   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3447d5761cdaSHong Zhang   b->free_a       = PETSC_FALSE;
3448d5761cdaSHong Zhang   b->free_ij      = PETSC_FALSE;
34499566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&B));
3450d5761cdaSHong Zhang 
3451e489de8fSHong Zhang   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3452e489de8fSHong Zhang   bnew->free_a       = PETSC_TRUE;
3453e489de8fSHong Zhang   bnew->free_ij      = PETSC_TRUE;
3454feb78a15SHong Zhang 
3455a5348796SHong Zhang   /* condense columns of maij->B */
34569566063dSJacob Faibussowitsch   PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
34579566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY));
34589566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY));
34599566063dSJacob Faibussowitsch   PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE));
34609566063dSJacob Faibussowitsch   PetscCall(MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
3461feb78a15SHong Zhang   PetscFunctionReturn(0);
3462feb78a15SHong Zhang }
3463feb78a15SHong Zhang 
3464ef514586SHong Zhang extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
34654aa3045dSJed Brown 
34661358a193SHong Zhang PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3467a0ff6018SBarry Smith {
346898b658c4SHong Zhang   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
346985f27616SHong Zhang   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
347098b658c4SHong Zhang   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
34711fd43edeSHong Zhang   Mat            M,Msub,B=a->B;
347298b658c4SHong Zhang   MatScalar      *aa;
347300e6dbe6SBarry Smith   Mat_SeqAIJ     *aij;
3474a31a438cSHong Zhang   PetscInt       *garray = a->garray,*colsub,Ncols;
347598b658c4SHong Zhang   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
347698b658c4SHong Zhang   IS             iscol_sub,iscmap;
347798b658c4SHong Zhang   const PetscInt *is_idx,*cmap;
347818e627e3SHong Zhang   PetscBool      allcolumns=PETSC_FALSE;
3479a31a438cSHong Zhang   MPI_Comm       comm;
34807e2c5f70SBarry Smith 
3481a0ff6018SBarry Smith   PetscFunctionBegin;
34829566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
3483d5761cdaSHong Zhang   if (call == MAT_REUSE_MATRIX) {
34849566063dSJacob Faibussowitsch     PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub));
348528b400f6SJacob Faibussowitsch     PetscCheck(iscol_sub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
34869566063dSJacob Faibussowitsch     PetscCall(ISGetLocalSize(iscol_sub,&count));
3487d5761cdaSHong Zhang 
34889566063dSJacob Faibussowitsch     PetscCall(PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap));
348928b400f6SJacob Faibussowitsch     PetscCheck(iscmap,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3490d5761cdaSHong Zhang 
34919566063dSJacob Faibussowitsch     PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub));
349228b400f6SJacob Faibussowitsch     PetscCheck(Msub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3493d5761cdaSHong Zhang 
34949566063dSJacob Faibussowitsch     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub));
3495d5761cdaSHong Zhang 
3496d5761cdaSHong Zhang   } else { /* call == MAT_INITIAL_MATRIX) */
34973b00a383SHong Zhang     PetscBool flg;
34983b00a383SHong Zhang 
34999566063dSJacob Faibussowitsch     PetscCall(ISGetLocalSize(iscol,&n));
35009566063dSJacob Faibussowitsch     PetscCall(ISGetSize(iscol,&Ncols));
3501bcae8d28SHong Zhang 
35023b00a383SHong Zhang     /* (1) iscol -> nonscalable iscol_local */
3503366a327dSHong Zhang     /* Check for special case: each processor gets entire matrix columns */
35049566063dSJacob Faibussowitsch     PetscCall(ISIdentity(iscol_local,&flg));
3505366a327dSHong Zhang     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
35061c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat)));
3507366a327dSHong Zhang     if (allcolumns) {
3508366a327dSHong Zhang       iscol_sub = iscol_local;
35099566063dSJacob Faibussowitsch       PetscCall(PetscObjectReference((PetscObject)iscol_local));
35109566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap));
3511366a327dSHong Zhang 
35123b00a383SHong Zhang     } else {
35131358a193SHong Zhang       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3514244c7f15SHong Zhang       PetscInt *idx,*cmap1,k;
35159566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(Ncols,&idx));
35169566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(Ncols,&cmap1));
35179566063dSJacob Faibussowitsch       PetscCall(ISGetIndices(iscol_local,&is_idx));
35188d2139bdSHong Zhang       count = 0;
3519a31a438cSHong Zhang       k     = 0;
3520a31a438cSHong Zhang       for (i=0; i<Ncols; i++) {
3521a31a438cSHong Zhang         j = is_idx[i];
3522a31a438cSHong Zhang         if (j >= cstart && j < cend) {
3523a31a438cSHong Zhang           /* diagonal part of mat */
35248d2139bdSHong Zhang           idx[count]     = j;
3525366a327dSHong Zhang           cmap1[count++] = i; /* column index in submat */
35264a3daf6eSHong Zhang         } else if (Bn) {
3527a31a438cSHong Zhang           /* off-diagonal part of mat */
3528a31a438cSHong Zhang           if (j == garray[k]) {
35298d2139bdSHong Zhang             idx[count]     = j;
3530a31a438cSHong Zhang             cmap1[count++] = i;  /* column index in submat */
3531a31a438cSHong Zhang           } else if (j > garray[k]) {
3532a31a438cSHong Zhang             while (j > garray[k] && k < Bn-1) k++;
3533a31a438cSHong Zhang             if (j == garray[k]) {
3534a31a438cSHong Zhang               idx[count]     = j;
3535a31a438cSHong Zhang               cmap1[count++] = i; /* column index in submat */
35368d2139bdSHong Zhang             }
35378d2139bdSHong Zhang           }
35388d2139bdSHong Zhang         }
35398d2139bdSHong Zhang       }
35409566063dSJacob Faibussowitsch       PetscCall(ISRestoreIndices(iscol_local,&is_idx));
35418d2139bdSHong Zhang 
35429566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub));
35439566063dSJacob Faibussowitsch       PetscCall(ISGetBlockSize(iscol,&cbs));
35449566063dSJacob Faibussowitsch       PetscCall(ISSetBlockSize(iscol_sub,cbs));
3545b6d9b4e0SHong Zhang 
35469566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap));
3547a31a438cSHong Zhang     }
35488b3fa1f7SHong Zhang 
35493b00a383SHong Zhang     /* (3) Create sequential Msub */
35509566063dSJacob Faibussowitsch     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub));
3551d5761cdaSHong Zhang   }
35528d2139bdSHong Zhang 
35539566063dSJacob Faibussowitsch   PetscCall(ISGetLocalSize(iscol_sub,&count));
355498b658c4SHong Zhang   aij  = (Mat_SeqAIJ*)(Msub)->data;
355598b658c4SHong Zhang   ii   = aij->i;
35569566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(iscmap,&cmap));
3557a0ff6018SBarry Smith 
3558a0ff6018SBarry Smith   /*
3559a0ff6018SBarry Smith       m - number of local rows
3560a31a438cSHong Zhang       Ncols - number of columns (same on all processors)
3561a0ff6018SBarry Smith       rstart - first row in new global matrix generated
3562a0ff6018SBarry Smith   */
35639566063dSJacob Faibussowitsch   PetscCall(MatGetSize(Msub,&m,NULL));
356498b658c4SHong Zhang 
35653b00a383SHong Zhang   if (call == MAT_INITIAL_MATRIX) {
35663b00a383SHong Zhang     /* (4) Create parallel newmat */
356798b658c4SHong Zhang     PetscMPIInt    rank,size;
3568bcae8d28SHong Zhang     PetscInt       csize;
356998b658c4SHong Zhang 
35709566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_size(comm,&size));
35719566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_rank(comm,&rank));
357200e6dbe6SBarry Smith 
3573a0ff6018SBarry Smith     /*
357400e6dbe6SBarry Smith         Determine the number of non-zeros in the diagonal and off-diagonal
357500e6dbe6SBarry Smith         portions of the matrix in order to do correct preallocation
3576a0ff6018SBarry Smith     */
357700e6dbe6SBarry Smith 
357800e6dbe6SBarry Smith     /* first get start and end of "diagonal" columns */
35799566063dSJacob Faibussowitsch     PetscCall(ISGetLocalSize(iscol,&csize));
35806a6a5d1dSBarry Smith     if (csize == PETSC_DECIDE) {
35819566063dSJacob Faibussowitsch       PetscCall(ISGetSize(isrow,&mglobal));
3582a31a438cSHong Zhang       if (mglobal == Ncols) { /* square matrix */
3583e2c4fddaSBarry Smith         nlocal = m;
35846a6a5d1dSBarry Smith       } else {
3585a31a438cSHong Zhang         nlocal = Ncols/size + ((Ncols % size) > rank);
3586ab50ec6bSBarry Smith       }
3587ab50ec6bSBarry Smith     } else {
35886a6a5d1dSBarry Smith       nlocal = csize;
35896a6a5d1dSBarry Smith     }
35909566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm));
359100e6dbe6SBarry Smith     rstart = rend - nlocal;
3592*aed4548fSBarry Smith     PetscCheck(rank != size - 1 || rend == Ncols,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,Ncols);
359300e6dbe6SBarry Smith 
359400e6dbe6SBarry Smith     /* next, compute all the lengths */
359598b658c4SHong Zhang     jj    = aij->j;
35969566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(2*m+1,&dlens));
359700e6dbe6SBarry Smith     olens = dlens + m;
359800e6dbe6SBarry Smith     for (i=0; i<m; i++) {
359900e6dbe6SBarry Smith       jend = ii[i+1] - ii[i];
360000e6dbe6SBarry Smith       olen = 0;
360100e6dbe6SBarry Smith       dlen = 0;
360200e6dbe6SBarry Smith       for (j=0; j<jend; j++) {
360315b2185cSHong Zhang         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
360400e6dbe6SBarry Smith         else dlen++;
360500e6dbe6SBarry Smith         jj++;
360600e6dbe6SBarry Smith       }
360700e6dbe6SBarry Smith       olens[i] = olen;
360800e6dbe6SBarry Smith       dlens[i] = dlen;
360900e6dbe6SBarry Smith     }
3610b6d9b4e0SHong Zhang 
36119566063dSJacob Faibussowitsch     PetscCall(ISGetBlockSize(isrow,&bs));
36129566063dSJacob Faibussowitsch     PetscCall(ISGetBlockSize(iscol,&cbs));
361398b658c4SHong Zhang 
36149566063dSJacob Faibussowitsch     PetscCall(MatCreate(comm,&M));
36159566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols));
36169566063dSJacob Faibussowitsch     PetscCall(MatSetBlockSizes(M,bs,cbs));
36179566063dSJacob Faibussowitsch     PetscCall(MatSetType(M,((PetscObject)mat)->type_name));
36189566063dSJacob Faibussowitsch     PetscCall(MatMPIAIJSetPreallocation(M,0,dlens,0,olens));
36199566063dSJacob Faibussowitsch     PetscCall(PetscFree(dlens));
3620d5761cdaSHong Zhang 
3621d5761cdaSHong Zhang   } else { /* call == MAT_REUSE_MATRIX */
3622a0ff6018SBarry Smith     M    = *newmat;
36239566063dSJacob Faibussowitsch     PetscCall(MatGetLocalSize(M,&i,NULL));
362408401ef6SPierre Jolivet     PetscCheck(i == m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
36259566063dSJacob Faibussowitsch     PetscCall(MatZeroEntries(M));
3626c48de900SBarry Smith     /*
3627c48de900SBarry Smith          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3628c48de900SBarry Smith        rather than the slower MatSetValues().
3629c48de900SBarry Smith     */
3630c48de900SBarry Smith     M->was_assembled = PETSC_TRUE;
3631c48de900SBarry Smith     M->assembled     = PETSC_FALSE;
3632a0ff6018SBarry Smith   }
3633548ecf4dSHong Zhang 
36343b00a383SHong Zhang   /* (5) Set values of Msub to *newmat */
36359566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(count,&colsub));
36369566063dSJacob Faibussowitsch   PetscCall(MatGetOwnershipRange(M,&rstart,NULL));
363798b658c4SHong Zhang 
363898b658c4SHong Zhang   jj   = aij->j;
36399566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayRead(Msub,(const PetscScalar**)&aa));
3640a0ff6018SBarry Smith   for (i=0; i<m; i++) {
3641a0ff6018SBarry Smith     row = rstart + i;
364200e6dbe6SBarry Smith     nz  = ii[i+1] - ii[i];
364315b2185cSHong Zhang     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
36449566063dSJacob Faibussowitsch     PetscCall(MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES));
364515b2185cSHong Zhang     jj += nz; aa += nz;
3646a0ff6018SBarry Smith   }
36479566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayRead(Msub,(const PetscScalar**)&aa));
36489566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscmap,&cmap));
3649a0ff6018SBarry Smith 
36509566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY));
36519566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY));
3652fee21e36SBarry Smith 
36539566063dSJacob Faibussowitsch   PetscCall(PetscFree(colsub));
365498b658c4SHong Zhang 
365598b658c4SHong Zhang   /* save Msub, iscol_sub and iscmap used in processor for next request */
3656fee21e36SBarry Smith   if (call == MAT_INITIAL_MATRIX) {
36573b00a383SHong Zhang     *newmat = M;
36589566063dSJacob Faibussowitsch     PetscCall(PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub));
36599566063dSJacob Faibussowitsch     PetscCall(MatDestroy(&Msub));
366098b658c4SHong Zhang 
36619566063dSJacob Faibussowitsch     PetscCall(PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub));
36629566063dSJacob Faibussowitsch     PetscCall(ISDestroy(&iscol_sub));
366398b658c4SHong Zhang 
36649566063dSJacob Faibussowitsch     PetscCall(PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap));
36659566063dSJacob Faibussowitsch     PetscCall(ISDestroy(&iscmap));
3666bcae8d28SHong Zhang 
3667bcae8d28SHong Zhang     if (iscol_local) {
36689566063dSJacob Faibussowitsch       PetscCall(PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local));
36699566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&iscol_local));
3670bcae8d28SHong Zhang     }
367198b658c4SHong Zhang   }
3672a0ff6018SBarry Smith   PetscFunctionReturn(0);
3673a0ff6018SBarry Smith }
3674273d9f13SBarry Smith 
3675df40acb1SHong Zhang /*
3676df40acb1SHong Zhang     Not great since it makes two copies of the submatrix, first an SeqAIJ
3677df40acb1SHong Zhang   in local and then by concatenating the local matrices the end result.
3678df40acb1SHong Zhang   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3679df40acb1SHong Zhang 
3680df40acb1SHong Zhang   Note: This requires a sequential iscol with all indices.
3681df40acb1SHong Zhang */
3682618cbb4aSHong Zhang PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3683df40acb1SHong Zhang {
3684df40acb1SHong Zhang   PetscMPIInt    rank,size;
3685df40acb1SHong Zhang   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3686df40acb1SHong Zhang   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3687df40acb1SHong Zhang   Mat            M,Mreuse;
368898b658c4SHong Zhang   MatScalar      *aa,*vwork;
3689df40acb1SHong Zhang   MPI_Comm       comm;
3690df40acb1SHong Zhang   Mat_SeqAIJ     *aij;
36910b27a90eSHong Zhang   PetscBool      colflag,allcolumns=PETSC_FALSE;
3692df40acb1SHong Zhang 
3693df40acb1SHong Zhang   PetscFunctionBegin;
36949566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
36959566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm,&rank));
36969566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm,&size));
3697df40acb1SHong Zhang 
36980b27a90eSHong Zhang   /* Check for special case: each processor gets entire matrix columns */
36999566063dSJacob Faibussowitsch   PetscCall(ISIdentity(iscol,&colflag));
37009566063dSJacob Faibussowitsch   PetscCall(ISGetLocalSize(iscol,&n));
37010b27a90eSHong Zhang   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
37021c2dc1cbSBarry Smith   PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat)));
37030b27a90eSHong Zhang 
3704df40acb1SHong Zhang   if (call ==  MAT_REUSE_MATRIX) {
37059566063dSJacob Faibussowitsch     PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse));
370628b400f6SJacob Faibussowitsch     PetscCheck(Mreuse,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
37079566063dSJacob Faibussowitsch     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse));
3708df40acb1SHong Zhang   } else {
37099566063dSJacob Faibussowitsch     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse));
3710df40acb1SHong Zhang   }
3711df40acb1SHong Zhang 
3712df40acb1SHong Zhang   /*
3713df40acb1SHong Zhang       m - number of local rows
3714df40acb1SHong Zhang       n - number of columns (same on all processors)
3715df40acb1SHong Zhang       rstart - first row in new global matrix generated
3716df40acb1SHong Zhang   */
37179566063dSJacob Faibussowitsch   PetscCall(MatGetSize(Mreuse,&m,&n));
37189566063dSJacob Faibussowitsch   PetscCall(MatGetBlockSizes(Mreuse,&bs,&cbs));
3719df40acb1SHong Zhang   if (call == MAT_INITIAL_MATRIX) {
3720df40acb1SHong Zhang     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3721df40acb1SHong Zhang     ii  = aij->i;
3722df40acb1SHong Zhang     jj  = aij->j;
3723df40acb1SHong Zhang 
3724df40acb1SHong Zhang     /*
3725df40acb1SHong Zhang         Determine the number of non-zeros in the diagonal and off-diagonal
3726df40acb1SHong Zhang         portions of the matrix in order to do correct preallocation
3727df40acb1SHong Zhang     */
3728df40acb1SHong Zhang 
3729df40acb1SHong Zhang     /* first get start and end of "diagonal" columns */
3730df40acb1SHong Zhang     if (csize == PETSC_DECIDE) {
37319566063dSJacob Faibussowitsch       PetscCall(ISGetSize(isrow,&mglobal));
3732df40acb1SHong Zhang       if (mglobal == n) { /* square matrix */
3733df40acb1SHong Zhang         nlocal = m;
3734df40acb1SHong Zhang       } else {
3735df40acb1SHong Zhang         nlocal = n/size + ((n % size) > rank);
3736df40acb1SHong Zhang       }
3737df40acb1SHong Zhang     } else {
3738df40acb1SHong Zhang       nlocal = csize;
3739df40acb1SHong Zhang     }
37409566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm));
3741df40acb1SHong Zhang     rstart = rend - nlocal;
3742*aed4548fSBarry Smith     PetscCheck(rank != size - 1 || rend == n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,n);
3743df40acb1SHong Zhang 
3744df40acb1SHong Zhang     /* next, compute all the lengths */
37459566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(2*m+1,&dlens));
3746df40acb1SHong Zhang     olens = dlens + m;
3747df40acb1SHong Zhang     for (i=0; i<m; i++) {
3748df40acb1SHong Zhang       jend = ii[i+1] - ii[i];
3749df40acb1SHong Zhang       olen = 0;
3750df40acb1SHong Zhang       dlen = 0;
3751df40acb1SHong Zhang       for (j=0; j<jend; j++) {
3752df40acb1SHong Zhang         if (*jj < rstart || *jj >= rend) olen++;
3753df40acb1SHong Zhang         else dlen++;
3754df40acb1SHong Zhang         jj++;
3755df40acb1SHong Zhang       }
3756df40acb1SHong Zhang       olens[i] = olen;
3757df40acb1SHong Zhang       dlens[i] = dlen;
3758df40acb1SHong Zhang     }
37599566063dSJacob Faibussowitsch     PetscCall(MatCreate(comm,&M));
37609566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(M,m,nlocal,PETSC_DECIDE,n));
37619566063dSJacob Faibussowitsch     PetscCall(MatSetBlockSizes(M,bs,cbs));
37629566063dSJacob Faibussowitsch     PetscCall(MatSetType(M,((PetscObject)mat)->type_name));
37639566063dSJacob Faibussowitsch     PetscCall(MatMPIAIJSetPreallocation(M,0,dlens,0,olens));
37649566063dSJacob Faibussowitsch     PetscCall(PetscFree(dlens));
3765df40acb1SHong Zhang   } else {
3766df40acb1SHong Zhang     PetscInt ml,nl;
3767df40acb1SHong Zhang 
3768df40acb1SHong Zhang     M    = *newmat;
37699566063dSJacob Faibussowitsch     PetscCall(MatGetLocalSize(M,&ml,&nl));
377008401ef6SPierre Jolivet     PetscCheck(ml == m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
37719566063dSJacob Faibussowitsch     PetscCall(MatZeroEntries(M));
3772df40acb1SHong Zhang     /*
3773df40acb1SHong Zhang          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3774df40acb1SHong Zhang        rather than the slower MatSetValues().
3775df40acb1SHong Zhang     */
3776df40acb1SHong Zhang     M->was_assembled = PETSC_TRUE;
3777df40acb1SHong Zhang     M->assembled     = PETSC_FALSE;
3778df40acb1SHong Zhang   }
37799566063dSJacob Faibussowitsch   PetscCall(MatGetOwnershipRange(M,&rstart,&rend));
3780df40acb1SHong Zhang   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3781df40acb1SHong Zhang   ii   = aij->i;
3782df40acb1SHong Zhang   jj   = aij->j;
37832e5835c6SStefano Zampini 
37842e5835c6SStefano Zampini   /* trigger copy to CPU if needed */
37859566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayRead(Mreuse,(const PetscScalar**)&aa));
3786df40acb1SHong Zhang   for (i=0; i<m; i++) {
3787df40acb1SHong Zhang     row   = rstart + i;
3788df40acb1SHong Zhang     nz    = ii[i+1] - ii[i];
3789df40acb1SHong Zhang     cwork = jj; jj += nz;
3790df40acb1SHong Zhang     vwork = aa; aa += nz;
37919566063dSJacob Faibussowitsch     PetscCall(MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES));
3792df40acb1SHong Zhang   }
37939566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayRead(Mreuse,(const PetscScalar**)&aa));
3794df40acb1SHong Zhang 
37959566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY));
37969566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY));
3797df40acb1SHong Zhang   *newmat = M;
3798df40acb1SHong Zhang 
3799df40acb1SHong Zhang   /* save submatrix used in processor for next request */
3800df40acb1SHong Zhang   if (call ==  MAT_INITIAL_MATRIX) {
38019566063dSJacob Faibussowitsch     PetscCall(PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse));
38029566063dSJacob Faibussowitsch     PetscCall(MatDestroy(&Mreuse));
3803df40acb1SHong Zhang   }
3804df40acb1SHong Zhang   PetscFunctionReturn(0);
3805df40acb1SHong Zhang }
3806df40acb1SHong Zhang 
38077087cfbeSBarry Smith PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3808ccd8e176SBarry Smith {
3809899cda47SBarry Smith   PetscInt       m,cstart, cend,j,nnz,i,d;
3810899cda47SBarry Smith   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3811ccd8e176SBarry Smith   const PetscInt *JJ;
3812eeb24464SBarry Smith   PetscBool      nooffprocentries;
3813ccd8e176SBarry Smith 
3814ccd8e176SBarry Smith   PetscFunctionBegin;
3815*aed4548fSBarry Smith   PetscCheck(Ii[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %" PetscInt_FMT,Ii[0]);
3816899cda47SBarry Smith 
38179566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->rmap));
38189566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->cmap));
3819d0f46423SBarry Smith   m      = B->rmap->n;
3820d0f46423SBarry Smith   cstart = B->cmap->rstart;
3821d0f46423SBarry Smith   cend   = B->cmap->rend;
3822d0f46423SBarry Smith   rstart = B->rmap->rstart;
3823899cda47SBarry Smith 
38249566063dSJacob Faibussowitsch   PetscCall(PetscCalloc2(m,&d_nnz,m,&o_nnz));
3825ccd8e176SBarry Smith 
382676bd3646SJed Brown   if (PetscDefined(USE_DEBUG)) {
38278f8f2f0dSBarry Smith     for (i=0; i<m; i++) {
3828ecc77c7aSBarry Smith       nnz = Ii[i+1]- Ii[i];
3829ecc77c7aSBarry Smith       JJ  = J + Ii[i];
383008401ef6SPierre Jolivet       PetscCheck(nnz >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns",i,nnz);
383108401ef6SPierre Jolivet       PetscCheck(!nnz || !(JJ[0] < 0),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT,i,JJ[0]);
383208401ef6SPierre Jolivet       PetscCheck(!nnz || !(JJ[nnz-1] >= B->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")",i,JJ[nnz-1],B->cmap->N);
3833ecc77c7aSBarry Smith     }
383476bd3646SJed Brown   }
3835ecc77c7aSBarry Smith 
38368f8f2f0dSBarry Smith   for (i=0; i<m; i++) {
3837b7940d39SSatish Balay     nnz     = Ii[i+1]- Ii[i];
3838b7940d39SSatish Balay     JJ      = J + Ii[i];
3839ccd8e176SBarry Smith     nnz_max = PetscMax(nnz_max,nnz);
3840ccd8e176SBarry Smith     d       = 0;
38410daa03b5SJed Brown     for (j=0; j<nnz; j++) {
38420daa03b5SJed Brown       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3843ccd8e176SBarry Smith     }
3844ccd8e176SBarry Smith     d_nnz[i] = d;
3845ccd8e176SBarry Smith     o_nnz[i] = nnz - d;
3846ccd8e176SBarry Smith   }
38479566063dSJacob Faibussowitsch   PetscCall(MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz));
38489566063dSJacob Faibussowitsch   PetscCall(PetscFree2(d_nnz,o_nnz));
3849ccd8e176SBarry Smith 
38508f8f2f0dSBarry Smith   for (i=0; i<m; i++) {
3851ccd8e176SBarry Smith     ii   = i + rstart;
38529566063dSJacob Faibussowitsch     PetscCall(MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES));
3853ccd8e176SBarry Smith   }
3854eeb24464SBarry Smith   nooffprocentries    = B->nooffprocentries;
3855eeb24464SBarry Smith   B->nooffprocentries = PETSC_TRUE;
38569566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
38579566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
3858eeb24464SBarry Smith   B->nooffprocentries = nooffprocentries;
3859ccd8e176SBarry Smith 
38609566063dSJacob Faibussowitsch   PetscCall(MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
3861ccd8e176SBarry Smith   PetscFunctionReturn(0);
3862ccd8e176SBarry Smith }
3863ccd8e176SBarry Smith 
38641eea217eSSatish Balay /*@
3865ccd8e176SBarry Smith    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3866ccd8e176SBarry Smith    (the default parallel PETSc format).
3867ccd8e176SBarry Smith 
3868d083f849SBarry Smith    Collective
3869ccd8e176SBarry Smith 
3870ccd8e176SBarry Smith    Input Parameters:
3871a1661176SMatthew Knepley +  B - the matrix
3872ccd8e176SBarry Smith .  i - the indices into j for the start of each local row (starts with zero)
38730daa03b5SJed Brown .  j - the column indices for each local row (starts with zero)
3874ccd8e176SBarry Smith -  v - optional values in the matrix
3875ccd8e176SBarry Smith 
3876ccd8e176SBarry Smith    Level: developer
3877ccd8e176SBarry Smith 
387812251496SSatish Balay    Notes:
3879c1c1d628SHong Zhang        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3880c1c1d628SHong Zhang      thus you CANNOT change the matrix entries by changing the values of v[] after you have
388112251496SSatish Balay      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
388212251496SSatish Balay 
388312251496SSatish Balay        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
388412251496SSatish Balay 
388512251496SSatish Balay        The format which is used for the sparse matrix input, is equivalent to a
388612251496SSatish Balay     row-major ordering.. i.e for the following matrix, the input data expected is
3887c5e4d11fSDmitry Karpeev     as shown
388812251496SSatish Balay 
3889c5e4d11fSDmitry Karpeev $        1 0 0
3890c5e4d11fSDmitry Karpeev $        2 0 3     P0
3891c5e4d11fSDmitry Karpeev $       -------
3892c5e4d11fSDmitry Karpeev $        4 5 6     P1
3893c5e4d11fSDmitry Karpeev $
3894c5e4d11fSDmitry Karpeev $     Process0 [P0]: rows_owned=[0,1]
3895c5e4d11fSDmitry Karpeev $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3896c5e4d11fSDmitry Karpeev $        j =  {0,0,2}  [size = 3]
3897c5e4d11fSDmitry Karpeev $        v =  {1,2,3}  [size = 3]
3898c5e4d11fSDmitry Karpeev $
3899c5e4d11fSDmitry Karpeev $     Process1 [P1]: rows_owned=[2]
3900c5e4d11fSDmitry Karpeev $        i =  {0,3}    [size = nrow+1  = 1+1]
3901c5e4d11fSDmitry Karpeev $        j =  {0,1,2}  [size = 3]
3902c5e4d11fSDmitry Karpeev $        v =  {4,5,6}  [size = 3]
390312251496SSatish Balay 
39045f4d30c4SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
39058d7a6e47SBarry Smith           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3906ccd8e176SBarry Smith @*/
39077087cfbeSBarry Smith PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3908ccd8e176SBarry Smith {
3909ccd8e176SBarry Smith   PetscFunctionBegin;
3910cac4c232SBarry Smith   PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));
3911ccd8e176SBarry Smith   PetscFunctionReturn(0);
3912ccd8e176SBarry Smith }
3913ccd8e176SBarry Smith 
3914273d9f13SBarry Smith /*@C
3915ccd8e176SBarry Smith    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3916273d9f13SBarry Smith    (the default parallel PETSc format).  For good matrix assembly performance
3917273d9f13SBarry Smith    the user should preallocate the matrix storage by setting the parameters
3918273d9f13SBarry Smith    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3919273d9f13SBarry Smith    performance can be increased by more than a factor of 50.
3920273d9f13SBarry Smith 
3921d083f849SBarry Smith    Collective
3922273d9f13SBarry Smith 
3923273d9f13SBarry Smith    Input Parameters:
39241c4f3114SJed Brown +  B - the matrix
3925273d9f13SBarry Smith .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3926273d9f13SBarry Smith            (same value is used for all local rows)
3927273d9f13SBarry Smith .  d_nnz - array containing the number of nonzeros in the various rows of the
3928273d9f13SBarry Smith            DIAGONAL portion of the local submatrix (possibly different for each row)
392920fa73abSMatthew G. Knepley            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3930273d9f13SBarry Smith            The size of this array is equal to the number of local rows, i.e 'm'.
39313287b5eaSJed Brown            For matrices that will be factored, you must leave room for (and set)
39323287b5eaSJed Brown            the diagonal entry even if it is zero.
3933273d9f13SBarry Smith .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3934273d9f13SBarry Smith            submatrix (same value is used for all local rows).
3935273d9f13SBarry Smith -  o_nnz - array containing the number of nonzeros in the various rows of the
3936273d9f13SBarry Smith            OFF-DIAGONAL portion of the local submatrix (possibly different for
393720fa73abSMatthew G. Knepley            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3938273d9f13SBarry Smith            structure. The size of this array is equal to the number
3939273d9f13SBarry Smith            of local rows, i.e 'm'.
3940273d9f13SBarry Smith 
394149a6f317SBarry Smith    If the *_nnz parameter is given then the *_nz parameter is ignored
394249a6f317SBarry Smith 
3943273d9f13SBarry Smith    The AIJ format (also called the Yale sparse matrix format or
3944ccd8e176SBarry Smith    compressed row storage (CSR)), is fully compatible with standard Fortran 77
39450598bfebSBarry Smith    storage.  The stored row and column indices begin with zero.
3946a7f22e61SSatish Balay    See Users-Manual: ch_mat for details.
3947273d9f13SBarry Smith 
3948273d9f13SBarry Smith    The parallel matrix is partitioned such that the first m0 rows belong to
3949273d9f13SBarry Smith    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3950273d9f13SBarry Smith    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3951273d9f13SBarry Smith 
3952273d9f13SBarry Smith    The DIAGONAL portion of the local submatrix of a processor can be defined
3953a05b864aSJed Brown    as the submatrix which is obtained by extraction the part corresponding to
3954a05b864aSJed Brown    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3955a05b864aSJed Brown    first row that belongs to the processor, r2 is the last row belonging to
3956a05b864aSJed Brown    the this processor, and c1-c2 is range of indices of the local part of a
3957a05b864aSJed Brown    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3958a05b864aSJed Brown    common case of a square matrix, the row and column ranges are the same and
3959a05b864aSJed Brown    the DIAGONAL part is also square. The remaining portion of the local
3960a05b864aSJed Brown    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3961273d9f13SBarry Smith 
3962273d9f13SBarry Smith    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3963273d9f13SBarry Smith 
3964aa95bbe8SBarry Smith    You can call MatGetInfo() to get information on how effective the preallocation was;
3965aa95bbe8SBarry Smith    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3966aa95bbe8SBarry Smith    You can also run with the option -info and look for messages with the string
3967aa95bbe8SBarry Smith    malloc in them to see if additional memory allocation was needed.
3968aa95bbe8SBarry Smith 
3969273d9f13SBarry Smith    Example usage:
3970273d9f13SBarry Smith 
3971273d9f13SBarry Smith    Consider the following 8x8 matrix with 34 non-zero values, that is
3972273d9f13SBarry Smith    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3973273d9f13SBarry Smith    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3974273d9f13SBarry Smith    as follows:
3975273d9f13SBarry Smith 
3976273d9f13SBarry Smith .vb
3977273d9f13SBarry Smith             1  2  0  |  0  3  0  |  0  4
3978273d9f13SBarry Smith     Proc0   0  5  6  |  7  0  0  |  8  0
3979273d9f13SBarry Smith             9  0 10  | 11  0  0  | 12  0
3980273d9f13SBarry Smith     -------------------------------------
3981273d9f13SBarry Smith            13  0 14  | 15 16 17  |  0  0
3982273d9f13SBarry Smith     Proc1   0 18  0  | 19 20 21  |  0  0
3983273d9f13SBarry Smith             0  0  0  | 22 23  0  | 24  0
3984273d9f13SBarry Smith     -------------------------------------
3985273d9f13SBarry Smith     Proc2  25 26 27  |  0  0 28  | 29  0
3986273d9f13SBarry Smith            30  0  0  | 31 32 33  |  0 34
3987273d9f13SBarry Smith .ve
3988273d9f13SBarry Smith 
3989273d9f13SBarry Smith    This can be represented as a collection of submatrices as:
3990273d9f13SBarry Smith 
3991273d9f13SBarry Smith .vb
3992273d9f13SBarry Smith       A B C
3993273d9f13SBarry Smith       D E F
3994273d9f13SBarry Smith       G H I
3995273d9f13SBarry Smith .ve
3996273d9f13SBarry Smith 
3997273d9f13SBarry Smith    Where the submatrices A,B,C are owned by proc0, D,E,F are
3998273d9f13SBarry Smith    owned by proc1, G,H,I are owned by proc2.
3999273d9f13SBarry Smith 
4000273d9f13SBarry Smith    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4001273d9f13SBarry Smith    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4002273d9f13SBarry Smith    The 'M','N' parameters are 8,8, and have the same values on all procs.
4003273d9f13SBarry Smith 
4004273d9f13SBarry Smith    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4005273d9f13SBarry Smith    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4006273d9f13SBarry Smith    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4007273d9f13SBarry Smith    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4008273d9f13SBarry Smith    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4009273d9f13SBarry Smith    matrix, ans [DF] as another SeqAIJ matrix.
4010273d9f13SBarry Smith 
4011273d9f13SBarry Smith    When d_nz, o_nz parameters are specified, d_nz storage elements are
4012273d9f13SBarry Smith    allocated for every row of the local diagonal submatrix, and o_nz
4013273d9f13SBarry Smith    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4014273d9f13SBarry Smith    One way to choose d_nz and o_nz is to use the max nonzerors per local
4015273d9f13SBarry Smith    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4016273d9f13SBarry Smith    In this case, the values of d_nz,o_nz are:
4017273d9f13SBarry Smith .vb
4018273d9f13SBarry Smith      proc0 : dnz = 2, o_nz = 2
4019273d9f13SBarry Smith      proc1 : dnz = 3, o_nz = 2
4020273d9f13SBarry Smith      proc2 : dnz = 1, o_nz = 4
4021273d9f13SBarry Smith .ve
4022273d9f13SBarry Smith    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4023273d9f13SBarry Smith    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4024273d9f13SBarry Smith    for proc3. i.e we are using 12+15+10=37 storage locations to store
4025273d9f13SBarry Smith    34 values.
4026273d9f13SBarry Smith 
4027273d9f13SBarry Smith    When d_nnz, o_nnz parameters are specified, the storage is specified
4028a5b23f4aSJose E. Roman    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4029273d9f13SBarry Smith    In the above case the values for d_nnz,o_nnz are:
4030273d9f13SBarry Smith .vb
4031273d9f13SBarry Smith      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4032273d9f13SBarry Smith      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4033273d9f13SBarry Smith      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4034273d9f13SBarry Smith .ve
4035273d9f13SBarry Smith    Here the space allocated is sum of all the above values i.e 34, and
4036273d9f13SBarry Smith    hence pre-allocation is perfect.
4037273d9f13SBarry Smith 
4038273d9f13SBarry Smith    Level: intermediate
4039273d9f13SBarry Smith 
404069b1f4b7SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
40415f4d30c4SBarry Smith           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4042273d9f13SBarry Smith @*/
40437087cfbeSBarry Smith PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4044273d9f13SBarry Smith {
4045273d9f13SBarry Smith   PetscFunctionBegin;
40466ba663aaSJed Brown   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
40476ba663aaSJed Brown   PetscValidType(B,1);
4048cac4c232SBarry Smith   PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));
4049273d9f13SBarry Smith   PetscFunctionReturn(0);
4050273d9f13SBarry Smith }
4051273d9f13SBarry Smith 
405258d36128SBarry Smith /*@
40532fb0ec9aSBarry Smith      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
40548f8f2f0dSBarry Smith          CSR format for the local rows.
40552fb0ec9aSBarry Smith 
4056d083f849SBarry Smith    Collective
40572fb0ec9aSBarry Smith 
40582fb0ec9aSBarry Smith    Input Parameters:
40592fb0ec9aSBarry Smith +  comm - MPI communicator
40602fb0ec9aSBarry Smith .  m - number of local rows (Cannot be PETSC_DECIDE)
40612fb0ec9aSBarry Smith .  n - This value should be the same as the local size used in creating the
40622fb0ec9aSBarry Smith        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
40632fb0ec9aSBarry Smith        calculated if N is given) For square matrices n is almost always m.
40642fb0ec9aSBarry Smith .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
40652fb0ec9aSBarry Smith .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4066483a2f95SBarry Smith .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
40672fb0ec9aSBarry Smith .   j - column indices
40682fb0ec9aSBarry Smith -   a - matrix values
40692fb0ec9aSBarry Smith 
40702fb0ec9aSBarry Smith    Output Parameter:
40712fb0ec9aSBarry Smith .   mat - the matrix
407203bfb495SBarry Smith 
40732fb0ec9aSBarry Smith    Level: intermediate
40742fb0ec9aSBarry Smith 
40752fb0ec9aSBarry Smith    Notes:
40762fb0ec9aSBarry Smith        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
40772fb0ec9aSBarry Smith      thus you CANNOT change the matrix entries by changing the values of a[] after you have
40788d7a6e47SBarry Smith      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
40792fb0ec9aSBarry Smith 
408012251496SSatish Balay        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
408112251496SSatish Balay 
408212251496SSatish Balay        The format which is used for the sparse matrix input, is equivalent to a
408312251496SSatish Balay     row-major ordering.. i.e for the following matrix, the input data expected is
4084c5e4d11fSDmitry Karpeev     as shown
408512251496SSatish Balay 
40868f8f2f0dSBarry Smith        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
40878f8f2f0dSBarry Smith 
4088c5e4d11fSDmitry Karpeev $        1 0 0
4089c5e4d11fSDmitry Karpeev $        2 0 3     P0
4090c5e4d11fSDmitry Karpeev $       -------
4091c5e4d11fSDmitry Karpeev $        4 5 6     P1
4092c5e4d11fSDmitry Karpeev $
4093c5e4d11fSDmitry Karpeev $     Process0 [P0]: rows_owned=[0,1]
4094c5e4d11fSDmitry Karpeev $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4095c5e4d11fSDmitry Karpeev $        j =  {0,0,2}  [size = 3]
4096c5e4d11fSDmitry Karpeev $        v =  {1,2,3}  [size = 3]
4097c5e4d11fSDmitry Karpeev $
4098c5e4d11fSDmitry Karpeev $     Process1 [P1]: rows_owned=[2]
4099c5e4d11fSDmitry Karpeev $        i =  {0,3}    [size = nrow+1  = 1+1]
4100c5e4d11fSDmitry Karpeev $        j =  {0,1,2}  [size = 3]
4101c5e4d11fSDmitry Karpeev $        v =  {4,5,6}  [size = 3]
41022fb0ec9aSBarry Smith 
41032fb0ec9aSBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
41048f8f2f0dSBarry Smith           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
41052fb0ec9aSBarry Smith @*/
41067087cfbeSBarry Smith PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
41072fb0ec9aSBarry Smith {
41082fb0ec9aSBarry Smith   PetscFunctionBegin;
410908401ef6SPierre Jolivet   PetscCheck(!i || !i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
411008401ef6SPierre Jolivet   PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
41119566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm,mat));
41129566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*mat,m,n,M,N));
41139566063dSJacob Faibussowitsch   /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */
41149566063dSJacob Faibussowitsch   PetscCall(MatSetType(*mat,MATMPIAIJ));
41159566063dSJacob Faibussowitsch   PetscCall(MatMPIAIJSetPreallocationCSR(*mat,i,j,a));
41162fb0ec9aSBarry Smith   PetscFunctionReturn(0);
41172fb0ec9aSBarry Smith }
41182fb0ec9aSBarry Smith 
41198f8f2f0dSBarry Smith /*@
41208f8f2f0dSBarry Smith      MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard
41218f8f2f0dSBarry Smith          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical
41228f8f2f0dSBarry Smith 
41238f8f2f0dSBarry Smith    Collective
41248f8f2f0dSBarry Smith 
41258f8f2f0dSBarry Smith    Input Parameters:
41268f8f2f0dSBarry Smith +  mat - the matrix
41278f8f2f0dSBarry Smith .  m - number of local rows (Cannot be PETSC_DECIDE)
41288f8f2f0dSBarry Smith .  n - This value should be the same as the local size used in creating the
41298f8f2f0dSBarry Smith        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
41308f8f2f0dSBarry Smith        calculated if N is given) For square matrices n is almost always m.
41318f8f2f0dSBarry Smith .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
41328f8f2f0dSBarry Smith .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
41338f8f2f0dSBarry Smith .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
41348f8f2f0dSBarry Smith .  J - column indices
41358f8f2f0dSBarry Smith -  v - matrix values
41368f8f2f0dSBarry Smith 
41378f8f2f0dSBarry Smith    Level: intermediate
41388f8f2f0dSBarry Smith 
41398f8f2f0dSBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
41408f8f2f0dSBarry Smith           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
41418f8f2f0dSBarry Smith @*/
41428f8f2f0dSBarry Smith PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
41438f8f2f0dSBarry Smith {
414470990e77SSatish Balay   PetscInt       cstart,nnz,i,j;
41458f8f2f0dSBarry Smith   PetscInt       *ld;
41468f8f2f0dSBarry Smith   PetscBool      nooffprocentries;
41478f8f2f0dSBarry Smith   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)mat->data;
4148fff043a9SJunchao Zhang   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ*)Aij->A->data;
4149fff043a9SJunchao Zhang   PetscScalar    *ad,*ao;
41508f8f2f0dSBarry Smith   const PetscInt *Adi = Ad->i;
41518f8f2f0dSBarry Smith   PetscInt       ldi,Iii,md;
41528f8f2f0dSBarry Smith 
41538f8f2f0dSBarry Smith   PetscFunctionBegin;
4154*aed4548fSBarry Smith   PetscCheck(Ii[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
415508401ef6SPierre Jolivet   PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
415608401ef6SPierre Jolivet   PetscCheck(m == mat->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
415708401ef6SPierre Jolivet   PetscCheck(n == mat->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
41588f8f2f0dSBarry Smith 
41599566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayWrite(Aij->A,&ad));
41609566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayWrite(Aij->B,&ao));
41618f8f2f0dSBarry Smith   cstart = mat->cmap->rstart;
41628f8f2f0dSBarry Smith   if (!Aij->ld) {
41638f8f2f0dSBarry Smith     /* count number of entries below block diagonal */
41649566063dSJacob Faibussowitsch     PetscCall(PetscCalloc1(m,&ld));
41658f8f2f0dSBarry Smith     Aij->ld = ld;
41668f8f2f0dSBarry Smith     for (i=0; i<m; i++) {
41678f8f2f0dSBarry Smith       nnz  = Ii[i+1]- Ii[i];
41688f8f2f0dSBarry Smith       j     = 0;
41698f8f2f0dSBarry Smith       while  (J[j] < cstart && j < nnz) {j++;}
41708f8f2f0dSBarry Smith       J    += nnz;
41718f8f2f0dSBarry Smith       ld[i] = j;
41728f8f2f0dSBarry Smith     }
41738f8f2f0dSBarry Smith   } else {
41748f8f2f0dSBarry Smith     ld = Aij->ld;
41758f8f2f0dSBarry Smith   }
41768f8f2f0dSBarry Smith 
41778f8f2f0dSBarry Smith   for (i=0; i<m; i++) {
41788f8f2f0dSBarry Smith     nnz  = Ii[i+1]- Ii[i];
41798f8f2f0dSBarry Smith     Iii  = Ii[i];
41808f8f2f0dSBarry Smith     ldi  = ld[i];
41818f8f2f0dSBarry Smith     md   = Adi[i+1]-Adi[i];
41829566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(ao,v + Iii,ldi));
41839566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(ad,v + Iii + ldi,md));
41849566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md));
41858f8f2f0dSBarry Smith     ad  += md;
41868f8f2f0dSBarry Smith     ao  += nnz - md;
41878f8f2f0dSBarry Smith   }
41888f8f2f0dSBarry Smith   nooffprocentries      = mat->nooffprocentries;
41898f8f2f0dSBarry Smith   mat->nooffprocentries = PETSC_TRUE;
41909566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A,&ad));
41919566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B,&ao));
41929566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
41939566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
41949566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
41959566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY));
41969566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY));
41978f8f2f0dSBarry Smith   mat->nooffprocentries = nooffprocentries;
41988f8f2f0dSBarry Smith   PetscFunctionReturn(0);
41998f8f2f0dSBarry Smith }
42008f8f2f0dSBarry Smith 
4201273d9f13SBarry Smith /*@C
420269b1f4b7SBarry Smith    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4203273d9f13SBarry Smith    (the default parallel PETSc format).  For good matrix assembly performance
4204273d9f13SBarry Smith    the user should preallocate the matrix storage by setting the parameters
4205273d9f13SBarry Smith    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4206273d9f13SBarry Smith    performance can be increased by more than a factor of 50.
4207273d9f13SBarry Smith 
4208d083f849SBarry Smith    Collective
4209273d9f13SBarry Smith 
4210273d9f13SBarry Smith    Input Parameters:
4211273d9f13SBarry Smith +  comm - MPI communicator
4212273d9f13SBarry Smith .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4213273d9f13SBarry Smith            This value should be the same as the local size used in creating the
4214273d9f13SBarry Smith            y vector for the matrix-vector product y = Ax.
4215273d9f13SBarry Smith .  n - This value should be the same as the local size used in creating the
4216273d9f13SBarry Smith        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4217273d9f13SBarry Smith        calculated if N is given) For square matrices n is almost always m.
4218273d9f13SBarry Smith .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4219273d9f13SBarry Smith .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4220273d9f13SBarry Smith .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4221273d9f13SBarry Smith            (same value is used for all local rows)
4222273d9f13SBarry Smith .  d_nnz - array containing the number of nonzeros in the various rows of the
4223273d9f13SBarry Smith            DIAGONAL portion of the local submatrix (possibly different for each row)
42240298fd71SBarry Smith            or NULL, if d_nz is used to specify the nonzero structure.
4225273d9f13SBarry Smith            The size of this array is equal to the number of local rows, i.e 'm'.
4226273d9f13SBarry Smith .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4227273d9f13SBarry Smith            submatrix (same value is used for all local rows).
4228273d9f13SBarry Smith -  o_nnz - array containing the number of nonzeros in the various rows of the
4229273d9f13SBarry Smith            OFF-DIAGONAL portion of the local submatrix (possibly different for
42300298fd71SBarry Smith            each row) or NULL, if o_nz is used to specify the nonzero
4231273d9f13SBarry Smith            structure. The size of this array is equal to the number
4232273d9f13SBarry Smith            of local rows, i.e 'm'.
4233273d9f13SBarry Smith 
4234273d9f13SBarry Smith    Output Parameter:
4235273d9f13SBarry Smith .  A - the matrix
4236273d9f13SBarry Smith 
4237175b88e8SBarry Smith    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4238f6f02116SRichard Tran Mills    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4239175b88e8SBarry Smith    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4240175b88e8SBarry Smith 
4241273d9f13SBarry Smith    Notes:
424249a6f317SBarry Smith    If the *_nnz parameter is given then the *_nz parameter is ignored
424349a6f317SBarry Smith 
4244273d9f13SBarry Smith    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4245273d9f13SBarry Smith    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4246273d9f13SBarry Smith    storage requirements for this matrix.
4247273d9f13SBarry Smith 
4248273d9f13SBarry Smith    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4249273d9f13SBarry Smith    processor than it must be used on all processors that share the object for
4250273d9f13SBarry Smith    that argument.
4251273d9f13SBarry Smith 
4252273d9f13SBarry Smith    The user MUST specify either the local or global matrix dimensions
4253273d9f13SBarry Smith    (possibly both).
4254273d9f13SBarry Smith 
425533a7c187SSatish Balay    The parallel matrix is partitioned across processors such that the
425633a7c187SSatish Balay    first m0 rows belong to process 0, the next m1 rows belong to
425733a7c187SSatish Balay    process 1, the next m2 rows belong to process 2 etc.. where
425833a7c187SSatish Balay    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
425933a7c187SSatish Balay    values corresponding to [m x N] submatrix.
4260273d9f13SBarry Smith 
426133a7c187SSatish Balay    The columns are logically partitioned with the n0 columns belonging
426233a7c187SSatish Balay    to 0th partition, the next n1 columns belonging to the next
4263df3898eeSBarry Smith    partition etc.. where n0,n1,n2... are the input parameter 'n'.
426433a7c187SSatish Balay 
426533a7c187SSatish Balay    The DIAGONAL portion of the local submatrix on any given processor
426633a7c187SSatish Balay    is the submatrix corresponding to the rows and columns m,n
426733a7c187SSatish Balay    corresponding to the given processor. i.e diagonal matrix on
426833a7c187SSatish Balay    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
426933a7c187SSatish Balay    etc. The remaining portion of the local submatrix [m x (N-n)]
427033a7c187SSatish Balay    constitute the OFF-DIAGONAL portion. The example below better
427133a7c187SSatish Balay    illustrates this concept.
427233a7c187SSatish Balay 
427333a7c187SSatish Balay    For a square global matrix we define each processor's diagonal portion
427433a7c187SSatish Balay    to be its local rows and the corresponding columns (a square submatrix);
427533a7c187SSatish Balay    each processor's off-diagonal portion encompasses the remainder of the
427633a7c187SSatish Balay    local matrix (a rectangular submatrix).
4277273d9f13SBarry Smith 
4278273d9f13SBarry Smith    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4279273d9f13SBarry Smith 
428097d05335SKris Buschelman    When calling this routine with a single process communicator, a matrix of
428197d05335SKris Buschelman    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4282da57b5cdSKarl Rupp    type of communicator, use the construction mechanism
4283da57b5cdSKarl Rupp .vb
428478102f6cSMatthew Knepley      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4285da57b5cdSKarl Rupp .ve
428697d05335SKris Buschelman 
4287f1058c0fSBarry Smith $     MatCreate(...,&A);
4288f1058c0fSBarry Smith $     MatSetType(A,MATMPIAIJ);
4289f1058c0fSBarry Smith $     MatSetSizes(A, m,n,M,N);
4290f1058c0fSBarry Smith $     MatMPIAIJSetPreallocation(A,...);
4291f1058c0fSBarry Smith 
4292273d9f13SBarry Smith    By default, this format uses inodes (identical nodes) when possible.
4293273d9f13SBarry Smith    We search for consecutive rows with the same nonzero structure, thereby
4294273d9f13SBarry Smith    reusing matrix information to achieve increased efficiency.
4295273d9f13SBarry Smith 
4296273d9f13SBarry Smith    Options Database Keys:
4297923f20ffSKris Buschelman +  -mat_no_inode  - Do not use inodes
42982f3b2168SJunchao Zhang .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
42992f3b2168SJunchao Zhang -  -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in MatMult() of sparse parallel matrices.
43002f3b2168SJunchao Zhang         See viewer types in manual of MatView(). Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix.
43012f3b2168SJunchao Zhang         Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one MatMult() call.
430247b2e64bSBarry Smith 
4303273d9f13SBarry Smith    Example usage:
4304273d9f13SBarry Smith 
4305273d9f13SBarry Smith    Consider the following 8x8 matrix with 34 non-zero values, that is
4306273d9f13SBarry Smith    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4307273d9f13SBarry Smith    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4308efc377ccSKarl Rupp    as follows
4309273d9f13SBarry Smith 
4310273d9f13SBarry Smith .vb
4311273d9f13SBarry Smith             1  2  0  |  0  3  0  |  0  4
4312273d9f13SBarry Smith     Proc0   0  5  6  |  7  0  0  |  8  0
4313273d9f13SBarry Smith             9  0 10  | 11  0  0  | 12  0
4314273d9f13SBarry Smith     -------------------------------------
4315273d9f13SBarry Smith            13  0 14  | 15 16 17  |  0  0
4316273d9f13SBarry Smith     Proc1   0 18  0  | 19 20 21  |  0  0
4317273d9f13SBarry Smith             0  0  0  | 22 23  0  | 24  0
4318273d9f13SBarry Smith     -------------------------------------
4319273d9f13SBarry Smith     Proc2  25 26 27  |  0  0 28  | 29  0
4320273d9f13SBarry Smith            30  0  0  | 31 32 33  |  0 34
4321273d9f13SBarry Smith .ve
4322273d9f13SBarry Smith 
4323da57b5cdSKarl Rupp    This can be represented as a collection of submatrices as
4324273d9f13SBarry Smith 
4325273d9f13SBarry Smith .vb
4326273d9f13SBarry Smith       A B C
4327273d9f13SBarry Smith       D E F
4328273d9f13SBarry Smith       G H I
4329273d9f13SBarry Smith .ve
4330273d9f13SBarry Smith 
4331273d9f13SBarry Smith    Where the submatrices A,B,C are owned by proc0, D,E,F are
4332273d9f13SBarry Smith    owned by proc1, G,H,I are owned by proc2.
4333273d9f13SBarry Smith 
4334273d9f13SBarry Smith    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4335273d9f13SBarry Smith    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4336273d9f13SBarry Smith    The 'M','N' parameters are 8,8, and have the same values on all procs.
4337273d9f13SBarry Smith 
4338273d9f13SBarry Smith    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4339273d9f13SBarry Smith    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4340273d9f13SBarry Smith    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4341273d9f13SBarry Smith    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4342273d9f13SBarry Smith    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4343273d9f13SBarry Smith    matrix, ans [DF] as another SeqAIJ matrix.
4344273d9f13SBarry Smith 
4345273d9f13SBarry Smith    When d_nz, o_nz parameters are specified, d_nz storage elements are
4346273d9f13SBarry Smith    allocated for every row of the local diagonal submatrix, and o_nz
4347273d9f13SBarry Smith    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4348273d9f13SBarry Smith    One way to choose d_nz and o_nz is to use the max nonzerors per local
4349273d9f13SBarry Smith    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4350da57b5cdSKarl Rupp    In this case, the values of d_nz,o_nz are
4351273d9f13SBarry Smith .vb
4352273d9f13SBarry Smith      proc0 : dnz = 2, o_nz = 2
4353273d9f13SBarry Smith      proc1 : dnz = 3, o_nz = 2
4354273d9f13SBarry Smith      proc2 : dnz = 1, o_nz = 4
4355273d9f13SBarry Smith .ve
4356273d9f13SBarry Smith    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4357273d9f13SBarry Smith    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4358273d9f13SBarry Smith    for proc3. i.e we are using 12+15+10=37 storage locations to store
4359273d9f13SBarry Smith    34 values.
4360273d9f13SBarry Smith 
4361273d9f13SBarry Smith    When d_nnz, o_nnz parameters are specified, the storage is specified
4362a5b23f4aSJose E. Roman    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4363da57b5cdSKarl Rupp    In the above case the values for d_nnz,o_nnz are
4364273d9f13SBarry Smith .vb
4365273d9f13SBarry Smith      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4366273d9f13SBarry Smith      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4367273d9f13SBarry Smith      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4368273d9f13SBarry Smith .ve
4369273d9f13SBarry Smith    Here the space allocated is sum of all the above values i.e 34, and
4370273d9f13SBarry Smith    hence pre-allocation is perfect.
4371273d9f13SBarry Smith 
4372273d9f13SBarry Smith    Level: intermediate
4373273d9f13SBarry Smith 
4374ccd8e176SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
43755f4d30c4SBarry Smith           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4376273d9f13SBarry Smith @*/
437769b1f4b7SBarry Smith PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4378273d9f13SBarry Smith {
4379b1d57f15SBarry Smith   PetscMPIInt    size;
4380273d9f13SBarry Smith 
4381273d9f13SBarry Smith   PetscFunctionBegin;
43829566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm,A));
43839566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*A,m,n,M,N));
43849566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm,&size));
4385273d9f13SBarry Smith   if (size > 1) {
43869566063dSJacob Faibussowitsch     PetscCall(MatSetType(*A,MATMPIAIJ));
43879566063dSJacob Faibussowitsch     PetscCall(MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz));
4388273d9f13SBarry Smith   } else {
43899566063dSJacob Faibussowitsch     PetscCall(MatSetType(*A,MATSEQAIJ));
43909566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJSetPreallocation(*A,d_nz,d_nnz));
4391273d9f13SBarry Smith   }
4392273d9f13SBarry Smith   PetscFunctionReturn(0);
4393273d9f13SBarry Smith }
4394195d93cdSBarry Smith 
4395127ca0efSMatthew Knepley /*@C
4396127ca0efSMatthew Knepley   MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix
4397127ca0efSMatthew Knepley 
4398127ca0efSMatthew Knepley   Not collective
4399127ca0efSMatthew Knepley 
4400127ca0efSMatthew Knepley   Input Parameter:
4401127ca0efSMatthew Knepley . A - The MPIAIJ matrix
4402127ca0efSMatthew Knepley 
4403127ca0efSMatthew Knepley   Output Parameters:
4404127ca0efSMatthew Knepley + Ad - The local diagonal block as a SeqAIJ matrix
4405127ca0efSMatthew Knepley . Ao - The local off-diagonal block as a SeqAIJ matrix
4406127ca0efSMatthew Knepley - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix
4407127ca0efSMatthew Knepley 
4408127ca0efSMatthew Knepley   Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns
4409127ca0efSMatthew Knepley   in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is
4410127ca0efSMatthew Knepley   the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these
4411127ca0efSMatthew Knepley   local column numbers to global column numbers in the original matrix.
4412127ca0efSMatthew Knepley 
4413127ca0efSMatthew Knepley   Level: intermediate
4414127ca0efSMatthew Knepley 
4415c3ca5d0dSPierre Jolivet .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ
4416127ca0efSMatthew Knepley @*/
44179230625dSJed Brown PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4418195d93cdSBarry Smith {
4419195d93cdSBarry Smith   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
442004cf37c7SBarry Smith   PetscBool      flg;
4421b1d57f15SBarry Smith 
4422195d93cdSBarry Smith   PetscFunctionBegin;
44239566063dSJacob Faibussowitsch   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg));
442428b400f6SJacob Faibussowitsch   PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
442521e72a00SBarry Smith   if (Ad)     *Ad     = a->A;
442621e72a00SBarry Smith   if (Ao)     *Ao     = a->B;
442721e72a00SBarry Smith   if (colmap) *colmap = a->garray;
4428195d93cdSBarry Smith   PetscFunctionReturn(0);
4429195d93cdSBarry Smith }
4430a2243be0SBarry Smith 
4431110bb6e1SHong Zhang PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
44329b8102ccSHong Zhang {
4433110bb6e1SHong Zhang   PetscInt       m,N,i,rstart,nnz,Ii;
44349b8102ccSHong Zhang   PetscInt       *indx;
4435110bb6e1SHong Zhang   PetscScalar    *values;
4436421ddf4dSJunchao Zhang   MatType        rootType;
44379b8102ccSHong Zhang 
44389b8102ccSHong Zhang   PetscFunctionBegin;
44399566063dSJacob Faibussowitsch   PetscCall(MatGetSize(inmat,&m,&N));
4440110bb6e1SHong Zhang   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4441110bb6e1SHong Zhang     PetscInt       *dnz,*onz,sum,bs,cbs;
4442110bb6e1SHong Zhang 
44439b8102ccSHong Zhang     if (n == PETSC_DECIDE) {
44449566063dSJacob Faibussowitsch       PetscCall(PetscSplitOwnership(comm,&n,&N));
44459b8102ccSHong Zhang     }
4446a22543b6SHong Zhang     /* Check sum(n) = N */
44471c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm));
444808401ef6SPierre Jolivet     PetscCheck(sum == N,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT,sum,N);
4449a22543b6SHong Zhang 
44509566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm));
44519b8102ccSHong Zhang     rstart -= m;
44529b8102ccSHong Zhang 
4453d0609cedSBarry Smith     MatPreallocateBegin(comm,m,n,dnz,onz);
44549b8102ccSHong Zhang     for (i=0; i<m; i++) {
44559566063dSJacob Faibussowitsch       PetscCall(MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL));
44569566063dSJacob Faibussowitsch       PetscCall(MatPreallocateSet(i+rstart,nnz,indx,dnz,onz));
44579566063dSJacob Faibussowitsch       PetscCall(MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL));
44589b8102ccSHong Zhang     }
44599b8102ccSHong Zhang 
44609566063dSJacob Faibussowitsch     PetscCall(MatCreate(comm,outmat));
44619566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE));
44629566063dSJacob Faibussowitsch     PetscCall(MatGetBlockSizes(inmat,&bs,&cbs));
44639566063dSJacob Faibussowitsch     PetscCall(MatSetBlockSizes(*outmat,bs,cbs));
44649566063dSJacob Faibussowitsch     PetscCall(MatGetRootType_Private(inmat,&rootType));
44659566063dSJacob Faibussowitsch     PetscCall(MatSetType(*outmat,rootType));
44669566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJSetPreallocation(*outmat,0,dnz));
44679566063dSJacob Faibussowitsch     PetscCall(MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz));
4468d0609cedSBarry Smith     MatPreallocateEnd(dnz,onz);
44699566063dSJacob Faibussowitsch     PetscCall(MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
44709b8102ccSHong Zhang   }
44719b8102ccSHong Zhang 
4472110bb6e1SHong Zhang   /* numeric phase */
44739566063dSJacob Faibussowitsch   PetscCall(MatGetOwnershipRange(*outmat,&rstart,NULL));
44749b8102ccSHong Zhang   for (i=0; i<m; i++) {
44759566063dSJacob Faibussowitsch     PetscCall(MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values));
44769b8102ccSHong Zhang     Ii   = i + rstart;
44779566063dSJacob Faibussowitsch     PetscCall(MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES));
44789566063dSJacob Faibussowitsch     PetscCall(MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values));
44799b8102ccSHong Zhang   }
44809566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY));
44819566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY));
4482c5d6d63eSBarry Smith   PetscFunctionReturn(0);
4483c5d6d63eSBarry Smith }
4484c5d6d63eSBarry Smith 
4485dfbe8321SBarry Smith PetscErrorCode MatFileSplit(Mat A,char *outfile)
4486c5d6d63eSBarry Smith {
448732dcc486SBarry Smith   PetscMPIInt       rank;
4488b1d57f15SBarry Smith   PetscInt          m,N,i,rstart,nnz;
4489de4209c5SBarry Smith   size_t            len;
4490b1d57f15SBarry Smith   const PetscInt    *indx;
4491c5d6d63eSBarry Smith   PetscViewer       out;
4492c5d6d63eSBarry Smith   char              *name;
4493c5d6d63eSBarry Smith   Mat               B;
4494b3cc6726SBarry Smith   const PetscScalar *values;
4495c5d6d63eSBarry Smith 
4496c5d6d63eSBarry Smith   PetscFunctionBegin;
44979566063dSJacob Faibussowitsch   PetscCall(MatGetLocalSize(A,&m,NULL));
44989566063dSJacob Faibussowitsch   PetscCall(MatGetSize(A,NULL,&N));
4499f204ca49SKris Buschelman   /* Should this be the type of the diagonal block of A? */
45009566063dSJacob Faibussowitsch   PetscCall(MatCreate(PETSC_COMM_SELF,&B));
45019566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(B,m,N,m,N));
45029566063dSJacob Faibussowitsch   PetscCall(MatSetBlockSizesFromMats(B,A,A));
45039566063dSJacob Faibussowitsch   PetscCall(MatSetType(B,MATSEQAIJ));
45049566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJSetPreallocation(B,0,NULL));
45059566063dSJacob Faibussowitsch   PetscCall(MatGetOwnershipRange(A,&rstart,NULL));
4506c5d6d63eSBarry Smith   for (i=0; i<m; i++) {
45079566063dSJacob Faibussowitsch     PetscCall(MatGetRow(A,i+rstart,&nnz,&indx,&values));
45089566063dSJacob Faibussowitsch     PetscCall(MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES));
45099566063dSJacob Faibussowitsch     PetscCall(MatRestoreRow(A,i+rstart,&nnz,&indx,&values));
4510c5d6d63eSBarry Smith   }
45119566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
45129566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
4513c5d6d63eSBarry Smith 
45149566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank));
45159566063dSJacob Faibussowitsch   PetscCall(PetscStrlen(outfile,&len));
45169566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(len+6,&name));
45179566063dSJacob Faibussowitsch   PetscCall(PetscSNPrintf(name,len+6,"%s.%d",outfile,rank));
45189566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out));
45199566063dSJacob Faibussowitsch   PetscCall(PetscFree(name));
45209566063dSJacob Faibussowitsch   PetscCall(MatView(B,out));
45219566063dSJacob Faibussowitsch   PetscCall(PetscViewerDestroy(&out));
45229566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&B));
4523c5d6d63eSBarry Smith   PetscFunctionReturn(0);
4524c5d6d63eSBarry Smith }
4525e5f2cdd8SHong Zhang 
45266718818eSStefano Zampini static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
452751a7d1a8SHong Zhang {
45286718818eSStefano Zampini   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
452951a7d1a8SHong Zhang 
453051a7d1a8SHong Zhang   PetscFunctionBegin;
45316718818eSStefano Zampini   if (!merge) PetscFunctionReturn(0);
45329566063dSJacob Faibussowitsch   PetscCall(PetscFree(merge->id_r));
45339566063dSJacob Faibussowitsch   PetscCall(PetscFree(merge->len_s));
45349566063dSJacob Faibussowitsch   PetscCall(PetscFree(merge->len_r));
45359566063dSJacob Faibussowitsch   PetscCall(PetscFree(merge->bi));
45369566063dSJacob Faibussowitsch   PetscCall(PetscFree(merge->bj));
45379566063dSJacob Faibussowitsch   PetscCall(PetscFree(merge->buf_ri[0]));
45389566063dSJacob Faibussowitsch   PetscCall(PetscFree(merge->buf_ri));
45399566063dSJacob Faibussowitsch   PetscCall(PetscFree(merge->buf_rj[0]));
45409566063dSJacob Faibussowitsch   PetscCall(PetscFree(merge->buf_rj));
45419566063dSJacob Faibussowitsch   PetscCall(PetscFree(merge->coi));
45429566063dSJacob Faibussowitsch   PetscCall(PetscFree(merge->coj));
45439566063dSJacob Faibussowitsch   PetscCall(PetscFree(merge->owners_co));
45449566063dSJacob Faibussowitsch   PetscCall(PetscLayoutDestroy(&merge->rowmap));
45459566063dSJacob Faibussowitsch   PetscCall(PetscFree(merge));
454651a7d1a8SHong Zhang   PetscFunctionReturn(0);
454751a7d1a8SHong Zhang }
454851a7d1a8SHong Zhang 
4549c6db04a5SJed Brown #include <../src/mat/utils/freespace.h>
4550c6db04a5SJed Brown #include <petscbt.h>
45514ebed01fSBarry Smith 
455290431a8fSHong Zhang PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
455355d1abb9SHong Zhang {
4554ce94432eSBarry Smith   MPI_Comm            comm;
455555d1abb9SHong Zhang   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4556b1d57f15SBarry Smith   PetscMPIInt         size,rank,taga,*len_s;
4557a2ea699eSBarry Smith   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4558b1d57f15SBarry Smith   PetscInt            proc,m;
4559b1d57f15SBarry Smith   PetscInt            **buf_ri,**buf_rj;
4560b1d57f15SBarry Smith   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4561b1d57f15SBarry Smith   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
456255d1abb9SHong Zhang   MPI_Request         *s_waits,*r_waits;
456355d1abb9SHong Zhang   MPI_Status          *status;
4564fff043a9SJunchao Zhang   const MatScalar     *aa,*a_a;
4565dd6ea824SBarry Smith   MatScalar           **abuf_r,*ba_i;
456655d1abb9SHong Zhang   Mat_Merge_SeqsToMPI *merge;
4567776b82aeSLisandro Dalcin   PetscContainer      container;
456855d1abb9SHong Zhang 
456955d1abb9SHong Zhang   PetscFunctionBegin;
45709566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)mpimat,&comm));
45719566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0));
45723c2c1871SHong Zhang 
45739566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm,&size));
45749566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm,&rank));
457555d1abb9SHong Zhang 
45769566063dSJacob Faibussowitsch   PetscCall(PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container));
457728b400f6SJacob Faibussowitsch   PetscCheck(container,PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
45789566063dSJacob Faibussowitsch   PetscCall(PetscContainerGetPointer(container,(void**)&merge));
45799566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayRead(seqmat,&a_a));
4580fff043a9SJunchao Zhang   aa   = a_a;
4581bf0cc555SLisandro Dalcin 
458255d1abb9SHong Zhang   bi     = merge->bi;
458355d1abb9SHong Zhang   bj     = merge->bj;
458455d1abb9SHong Zhang   buf_ri = merge->buf_ri;
458555d1abb9SHong Zhang   buf_rj = merge->buf_rj;
458655d1abb9SHong Zhang 
45879566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(size,&status));
45887a2fc3feSBarry Smith   owners = merge->rowmap->range;
458955d1abb9SHong Zhang   len_s  = merge->len_s;
459055d1abb9SHong Zhang 
459155d1abb9SHong Zhang   /* send and recv matrix values */
459255d1abb9SHong Zhang   /*-----------------------------*/
45939566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetNewTag((PetscObject)mpimat,&taga));
45949566063dSJacob Faibussowitsch   PetscCall(PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits));
459555d1abb9SHong Zhang 
45969566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(merge->nsend+1,&s_waits));
459755d1abb9SHong Zhang   for (proc=0,k=0; proc<size; proc++) {
459855d1abb9SHong Zhang     if (!len_s[proc]) continue;
459955d1abb9SHong Zhang     i    = owners[proc];
46009566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k));
460155d1abb9SHong Zhang     k++;
460255d1abb9SHong Zhang   }
460355d1abb9SHong Zhang 
46049566063dSJacob Faibussowitsch   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,r_waits,status));
46059566063dSJacob Faibussowitsch   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,s_waits,status));
46069566063dSJacob Faibussowitsch   PetscCall(PetscFree(status));
460755d1abb9SHong Zhang 
46089566063dSJacob Faibussowitsch   PetscCall(PetscFree(s_waits));
46099566063dSJacob Faibussowitsch   PetscCall(PetscFree(r_waits));
461055d1abb9SHong Zhang 
461155d1abb9SHong Zhang   /* insert mat values of mpimat */
461255d1abb9SHong Zhang   /*----------------------------*/
46139566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(N,&ba_i));
46149566063dSJacob Faibussowitsch   PetscCall(PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai));
461555d1abb9SHong Zhang 
461655d1abb9SHong Zhang   for (k=0; k<merge->nrecv; k++) {
461755d1abb9SHong Zhang     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
461855d1abb9SHong Zhang     nrows       = *(buf_ri_k[k]);
461955d1abb9SHong Zhang     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4620a5b23f4aSJose E. Roman     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
462155d1abb9SHong Zhang   }
462255d1abb9SHong Zhang 
462355d1abb9SHong Zhang   /* set values of ba */
46247a2fc3feSBarry Smith   m    = merge->rowmap->n;
462555d1abb9SHong Zhang   for (i=0; i<m; i++) {
462655d1abb9SHong Zhang     arow = owners[rank] + i;
462755d1abb9SHong Zhang     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
462855d1abb9SHong Zhang     bnzi = bi[i+1] - bi[i];
46299566063dSJacob Faibussowitsch     PetscCall(PetscArrayzero(ba_i,bnzi));
463055d1abb9SHong Zhang 
463155d1abb9SHong Zhang     /* add local non-zero vals of this proc's seqmat into ba */
463255d1abb9SHong Zhang     anzi   = ai[arow+1] - ai[arow];
463355d1abb9SHong Zhang     aj     = a->j + ai[arow];
4634fff043a9SJunchao Zhang     aa     = a_a + ai[arow];
463555d1abb9SHong Zhang     nextaj = 0;
463655d1abb9SHong Zhang     for (j=0; nextaj<anzi; j++) {
463755d1abb9SHong Zhang       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
463855d1abb9SHong Zhang         ba_i[j] += aa[nextaj++];
463955d1abb9SHong Zhang       }
464055d1abb9SHong Zhang     }
464155d1abb9SHong Zhang 
464255d1abb9SHong Zhang     /* add received vals into ba */
464355d1abb9SHong Zhang     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
464455d1abb9SHong Zhang       /* i-th row */
464555d1abb9SHong Zhang       if (i == *nextrow[k]) {
464655d1abb9SHong Zhang         anzi   = *(nextai[k]+1) - *nextai[k];
464755d1abb9SHong Zhang         aj     = buf_rj[k] + *(nextai[k]);
464855d1abb9SHong Zhang         aa     = abuf_r[k] + *(nextai[k]);
464955d1abb9SHong Zhang         nextaj = 0;
465055d1abb9SHong Zhang         for (j=0; nextaj<anzi; j++) {
465155d1abb9SHong Zhang           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
465255d1abb9SHong Zhang             ba_i[j] += aa[nextaj++];
465355d1abb9SHong Zhang           }
465455d1abb9SHong Zhang         }
465555d1abb9SHong Zhang         nextrow[k]++; nextai[k]++;
465655d1abb9SHong Zhang       }
465755d1abb9SHong Zhang     }
46589566063dSJacob Faibussowitsch     PetscCall(MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES));
465955d1abb9SHong Zhang   }
46609566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayRead(seqmat,&a_a));
46619566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY));
46629566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY));
466355d1abb9SHong Zhang 
46649566063dSJacob Faibussowitsch   PetscCall(PetscFree(abuf_r[0]));
46659566063dSJacob Faibussowitsch   PetscCall(PetscFree(abuf_r));
46669566063dSJacob Faibussowitsch   PetscCall(PetscFree(ba_i));
46679566063dSJacob Faibussowitsch   PetscCall(PetscFree3(buf_ri_k,nextrow,nextai));
46689566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0));
466955d1abb9SHong Zhang   PetscFunctionReturn(0);
467055d1abb9SHong Zhang }
467138f152feSBarry Smith 
467290431a8fSHong Zhang PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4673e5f2cdd8SHong Zhang {
467455a3bba9SHong Zhang   Mat                 B_mpi;
4675c2234fe3SHong Zhang   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4676b1d57f15SBarry Smith   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4677b1d57f15SBarry Smith   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4678d0f46423SBarry Smith   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4679a2f3521dSMark F. Adams   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4680b1d57f15SBarry Smith   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4681b1d57f15SBarry Smith   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
468255d1abb9SHong Zhang   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
468358cb9c82SHong Zhang   MPI_Status          *status;
46840298fd71SBarry Smith   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4685be0fcf8dSHong Zhang   PetscBT             lnkbt;
468651a7d1a8SHong Zhang   Mat_Merge_SeqsToMPI *merge;
4687776b82aeSLisandro Dalcin   PetscContainer      container;
468802c68681SHong Zhang 
4689e5f2cdd8SHong Zhang   PetscFunctionBegin;
46909566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0));
46913c2c1871SHong Zhang 
469238f152feSBarry Smith   /* make sure it is a PETSc comm */
46939566063dSJacob Faibussowitsch   PetscCall(PetscCommDuplicate(comm,&comm,NULL));
46949566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm,&size));
46959566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm,&rank));
469655d1abb9SHong Zhang 
46979566063dSJacob Faibussowitsch   PetscCall(PetscNew(&merge));
46989566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(size,&status));
4699e5f2cdd8SHong Zhang 
47006abd8857SHong Zhang   /* determine row ownership */
4701f08fae4eSHong Zhang   /*---------------------------------------------------------*/
47029566063dSJacob Faibussowitsch   PetscCall(PetscLayoutCreate(comm,&merge->rowmap));
47039566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetLocalSize(merge->rowmap,m));
47049566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetSize(merge->rowmap,M));
47059566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetBlockSize(merge->rowmap,1));
47069566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(merge->rowmap));
47079566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(size,&len_si));
47089566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(size,&merge->len_s));
470955d1abb9SHong Zhang 
47107a2fc3feSBarry Smith   m      = merge->rowmap->n;
47117a2fc3feSBarry Smith   owners = merge->rowmap->range;
47126abd8857SHong Zhang 
47136abd8857SHong Zhang   /* determine the number of messages to send, their lengths */
47146abd8857SHong Zhang   /*---------------------------------------------------------*/
47153e06a4e6SHong Zhang   len_s = merge->len_s;
471651a7d1a8SHong Zhang 
47172257cef7SHong Zhang   len          = 0; /* length of buf_si[] */
4718c2234fe3SHong Zhang   merge->nsend = 0;
4719409913e3SHong Zhang   for (proc=0; proc<size; proc++) {
47202257cef7SHong Zhang     len_si[proc] = 0;
47213e06a4e6SHong Zhang     if (proc == rank) {
47226abd8857SHong Zhang       len_s[proc] = 0;
47233e06a4e6SHong Zhang     } else {
472402c68681SHong Zhang       len_si[proc] = owners[proc+1] - owners[proc] + 1;
47253e06a4e6SHong Zhang       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
47263e06a4e6SHong Zhang     }
47273e06a4e6SHong Zhang     if (len_s[proc]) {
4728c2234fe3SHong Zhang       merge->nsend++;
47292257cef7SHong Zhang       nrows = 0;
47302257cef7SHong Zhang       for (i=owners[proc]; i<owners[proc+1]; i++) {
47312257cef7SHong Zhang         if (ai[i+1] > ai[i]) nrows++;
47322257cef7SHong Zhang       }
47332257cef7SHong Zhang       len_si[proc] = 2*(nrows+1);
47342257cef7SHong Zhang       len         += len_si[proc];
4735409913e3SHong Zhang     }
473658cb9c82SHong Zhang   }
4737409913e3SHong Zhang 
47382257cef7SHong Zhang   /* determine the number and length of messages to receive for ij-structure */
47392257cef7SHong Zhang   /*-------------------------------------------------------------------------*/
47409566063dSJacob Faibussowitsch   PetscCall(PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv));
47419566063dSJacob Faibussowitsch   PetscCall(PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri));
4742671beff6SHong Zhang 
47433e06a4e6SHong Zhang   /* post the Irecv of j-structure */
47443e06a4e6SHong Zhang   /*-------------------------------*/
47459566063dSJacob Faibussowitsch   PetscCall(PetscCommGetNewTag(comm,&tagj));
47469566063dSJacob Faibussowitsch   PetscCall(PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits));
474702c68681SHong Zhang 
47483e06a4e6SHong Zhang   /* post the Isend of j-structure */
4749affca5deSHong Zhang   /*--------------------------------*/
47509566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits));
47513e06a4e6SHong Zhang 
47522257cef7SHong Zhang   for (proc=0, k=0; proc<size; proc++) {
4753409913e3SHong Zhang     if (!len_s[proc]) continue;
475402c68681SHong Zhang     i    = owners[proc];
47559566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k));
475651a7d1a8SHong Zhang     k++;
475751a7d1a8SHong Zhang   }
475851a7d1a8SHong Zhang 
47593e06a4e6SHong Zhang   /* receives and sends of j-structure are complete */
47603e06a4e6SHong Zhang   /*------------------------------------------------*/
47619566063dSJacob Faibussowitsch   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,rj_waits,status));
47629566063dSJacob Faibussowitsch   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,sj_waits,status));
476302c68681SHong Zhang 
476402c68681SHong Zhang   /* send and recv i-structure */
476502c68681SHong Zhang   /*---------------------------*/
47669566063dSJacob Faibussowitsch   PetscCall(PetscCommGetNewTag(comm,&tagi));
47679566063dSJacob Faibussowitsch   PetscCall(PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits));
476802c68681SHong Zhang 
47699566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(len+1,&buf_s));
47703e06a4e6SHong Zhang   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
47712257cef7SHong Zhang   for (proc=0,k=0; proc<size; proc++) {
477202c68681SHong Zhang     if (!len_s[proc]) continue;
47733e06a4e6SHong Zhang     /* form outgoing message for i-structure:
47743e06a4e6SHong Zhang          buf_si[0]:                 nrows to be sent
47753e06a4e6SHong Zhang                [1:nrows]:           row index (global)
47763e06a4e6SHong Zhang                [nrows+1:2*nrows+1]: i-structure index
47773e06a4e6SHong Zhang     */
47783e06a4e6SHong Zhang     /*-------------------------------------------*/
47792257cef7SHong Zhang     nrows       = len_si[proc]/2 - 1;
47803e06a4e6SHong Zhang     buf_si_i    = buf_si + nrows+1;
47813e06a4e6SHong Zhang     buf_si[0]   = nrows;
47823e06a4e6SHong Zhang     buf_si_i[0] = 0;
47833e06a4e6SHong Zhang     nrows       = 0;
47843e06a4e6SHong Zhang     for (i=owners[proc]; i<owners[proc+1]; i++) {
47853e06a4e6SHong Zhang       anzi = ai[i+1] - ai[i];
47863e06a4e6SHong Zhang       if (anzi) {
47873e06a4e6SHong Zhang         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
47883e06a4e6SHong Zhang         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
47893e06a4e6SHong Zhang         nrows++;
47903e06a4e6SHong Zhang       }
47913e06a4e6SHong Zhang     }
47929566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k));
479302c68681SHong Zhang     k++;
47942257cef7SHong Zhang     buf_si += len_si[proc];
479502c68681SHong Zhang   }
47962257cef7SHong Zhang 
47979566063dSJacob Faibussowitsch   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,ri_waits,status));
47989566063dSJacob Faibussowitsch   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,si_waits,status));
479902c68681SHong Zhang 
48009566063dSJacob Faibussowitsch   PetscCall(PetscInfo(seqmat,"nsend: %d, nrecv: %d\n",merge->nsend,merge->nrecv));
48013e06a4e6SHong Zhang   for (i=0; i<merge->nrecv; i++) {
48029566063dSJacob Faibussowitsch     PetscCall(PetscInfo(seqmat,"recv len_ri=%d, len_rj=%d from [%d]\n",len_ri[i],merge->len_r[i],merge->id_r[i]));
48033e06a4e6SHong Zhang   }
48043e06a4e6SHong Zhang 
48059566063dSJacob Faibussowitsch   PetscCall(PetscFree(len_si));
48069566063dSJacob Faibussowitsch   PetscCall(PetscFree(len_ri));
48079566063dSJacob Faibussowitsch   PetscCall(PetscFree(rj_waits));
48089566063dSJacob Faibussowitsch   PetscCall(PetscFree2(si_waits,sj_waits));
48099566063dSJacob Faibussowitsch   PetscCall(PetscFree(ri_waits));
48109566063dSJacob Faibussowitsch   PetscCall(PetscFree(buf_s));
48119566063dSJacob Faibussowitsch   PetscCall(PetscFree(status));
481258cb9c82SHong Zhang 
4813bcc1bcd5SHong Zhang   /* compute a local seq matrix in each processor */
4814bcc1bcd5SHong Zhang   /*----------------------------------------------*/
481558cb9c82SHong Zhang   /* allocate bi array and free space for accumulating nonzero column info */
48169566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(m+1,&bi));
481758cb9c82SHong Zhang   bi[0] = 0;
481858cb9c82SHong Zhang 
4819be0fcf8dSHong Zhang   /* create and initialize a linked list */
4820be0fcf8dSHong Zhang   nlnk = N+1;
48219566063dSJacob Faibussowitsch   PetscCall(PetscLLCreate(N,N,nlnk,lnk,lnkbt));
482258cb9c82SHong Zhang 
4823bcc1bcd5SHong Zhang   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4824bcc1bcd5SHong Zhang   len  = ai[owners[rank+1]] - ai[owners[rank]];
48259566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space));
48262205254eSKarl Rupp 
482758cb9c82SHong Zhang   current_space = free_space;
482858cb9c82SHong Zhang 
4829bcc1bcd5SHong Zhang   /* determine symbolic info for each local row */
48309566063dSJacob Faibussowitsch   PetscCall(PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai));
48311d79065fSBarry Smith 
48323e06a4e6SHong Zhang   for (k=0; k<merge->nrecv; k++) {
48332257cef7SHong Zhang     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
48343e06a4e6SHong Zhang     nrows       = *buf_ri_k[k];
48353e06a4e6SHong Zhang     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4836a5b23f4aSJose E. Roman     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
48373e06a4e6SHong Zhang   }
48382257cef7SHong Zhang 
4839d0609cedSBarry Smith   MatPreallocateBegin(comm,m,n,dnz,onz);
4840bcc1bcd5SHong Zhang   len  = 0;
484158cb9c82SHong Zhang   for (i=0; i<m; i++) {
484258cb9c82SHong Zhang     bnzi = 0;
484358cb9c82SHong Zhang     /* add local non-zero cols of this proc's seqmat into lnk */
484458cb9c82SHong Zhang     arow  = owners[rank] + i;
484558cb9c82SHong Zhang     anzi  = ai[arow+1] - ai[arow];
484658cb9c82SHong Zhang     aj    = a->j + ai[arow];
48479566063dSJacob Faibussowitsch     PetscCall(PetscLLAddSorted(anzi,aj,N,&nlnk,lnk,lnkbt));
484858cb9c82SHong Zhang     bnzi += nlnk;
484958cb9c82SHong Zhang     /* add received col data into lnk */
485051a7d1a8SHong Zhang     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
485155d1abb9SHong Zhang       if (i == *nextrow[k]) { /* i-th row */
48523e06a4e6SHong Zhang         anzi  = *(nextai[k]+1) - *nextai[k];
48533e06a4e6SHong Zhang         aj    = buf_rj[k] + *nextai[k];
48549566063dSJacob Faibussowitsch         PetscCall(PetscLLAddSorted(anzi,aj,N,&nlnk,lnk,lnkbt));
48553e06a4e6SHong Zhang         bnzi += nlnk;
48563e06a4e6SHong Zhang         nextrow[k]++; nextai[k]++;
48573e06a4e6SHong Zhang       }
485858cb9c82SHong Zhang     }
4859bcc1bcd5SHong Zhang     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
486058cb9c82SHong Zhang 
486158cb9c82SHong Zhang     /* if free space is not available, make more free space */
486258cb9c82SHong Zhang     if (current_space->local_remaining<bnzi) {
48639566063dSJacob Faibussowitsch       PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space));
486458cb9c82SHong Zhang       nspacedouble++;
486558cb9c82SHong Zhang     }
486658cb9c82SHong Zhang     /* copy data into free space, then initialize lnk */
48679566063dSJacob Faibussowitsch     PetscCall(PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt));
48689566063dSJacob Faibussowitsch     PetscCall(MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz));
4869bcc1bcd5SHong Zhang 
487058cb9c82SHong Zhang     current_space->array           += bnzi;
487158cb9c82SHong Zhang     current_space->local_used      += bnzi;
487258cb9c82SHong Zhang     current_space->local_remaining -= bnzi;
487358cb9c82SHong Zhang 
487458cb9c82SHong Zhang     bi[i+1] = bi[i] + bnzi;
487558cb9c82SHong Zhang   }
4876bcc1bcd5SHong Zhang 
48779566063dSJacob Faibussowitsch   PetscCall(PetscFree3(buf_ri_k,nextrow,nextai));
4878bcc1bcd5SHong Zhang 
48799566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(bi[m]+1,&bj));
48809566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceContiguous(&free_space,bj));
48819566063dSJacob Faibussowitsch   PetscCall(PetscLLDestroy(lnk,lnkbt));
4882409913e3SHong Zhang 
4883bcc1bcd5SHong Zhang   /* create symbolic parallel matrix B_mpi */
4884bcc1bcd5SHong Zhang   /*---------------------------------------*/
48859566063dSJacob Faibussowitsch   PetscCall(MatGetBlockSizes(seqmat,&bs,&cbs));
48869566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm,&B_mpi));
488754b84b50SHong Zhang   if (n==PETSC_DECIDE) {
48889566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N));
488954b84b50SHong Zhang   } else {
48909566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE));
489154b84b50SHong Zhang   }
48929566063dSJacob Faibussowitsch   PetscCall(MatSetBlockSizes(B_mpi,bs,cbs));
48939566063dSJacob Faibussowitsch   PetscCall(MatSetType(B_mpi,MATMPIAIJ));
48949566063dSJacob Faibussowitsch   PetscCall(MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz));
4895d0609cedSBarry Smith   MatPreallocateEnd(dnz,onz);
48969566063dSJacob Faibussowitsch   PetscCall(MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE));
489758cb9c82SHong Zhang 
489890431a8fSHong Zhang   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
48996abd8857SHong Zhang   B_mpi->assembled  = PETSC_FALSE;
4900affca5deSHong Zhang   merge->bi         = bi;
4901affca5deSHong Zhang   merge->bj         = bj;
490202c68681SHong Zhang   merge->buf_ri     = buf_ri;
490302c68681SHong Zhang   merge->buf_rj     = buf_rj;
49040298fd71SBarry Smith   merge->coi        = NULL;
49050298fd71SBarry Smith   merge->coj        = NULL;
49060298fd71SBarry Smith   merge->owners_co  = NULL;
4907affca5deSHong Zhang 
49089566063dSJacob Faibussowitsch   PetscCall(PetscCommDestroy(&comm));
4909bf0cc555SLisandro Dalcin 
4910affca5deSHong Zhang   /* attach the supporting struct to B_mpi for reuse */
49119566063dSJacob Faibussowitsch   PetscCall(PetscContainerCreate(PETSC_COMM_SELF,&container));
49129566063dSJacob Faibussowitsch   PetscCall(PetscContainerSetPointer(container,merge));
49139566063dSJacob Faibussowitsch   PetscCall(PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI));
49149566063dSJacob Faibussowitsch   PetscCall(PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container));
49159566063dSJacob Faibussowitsch   PetscCall(PetscContainerDestroy(&container));
4916affca5deSHong Zhang   *mpimat = B_mpi;
491738f152feSBarry Smith 
49189566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0));
4919e5f2cdd8SHong Zhang   PetscFunctionReturn(0);
4920e5f2cdd8SHong Zhang }
492125616d81SHong Zhang 
4922d4036a1aSHong Zhang /*@C
49235f4d30c4SBarry Smith       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4924d4036a1aSHong Zhang                  matrices from each processor
4925d4036a1aSHong Zhang 
4926d083f849SBarry Smith     Collective
4927d4036a1aSHong Zhang 
4928d4036a1aSHong Zhang    Input Parameters:
4929d4036a1aSHong Zhang +    comm - the communicators the parallel matrix will live on
4930d4036a1aSHong Zhang .    seqmat - the input sequential matrices
4931d4036a1aSHong Zhang .    m - number of local rows (or PETSC_DECIDE)
4932d4036a1aSHong Zhang .    n - number of local columns (or PETSC_DECIDE)
4933d4036a1aSHong Zhang -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4934d4036a1aSHong Zhang 
4935d4036a1aSHong Zhang    Output Parameter:
4936d4036a1aSHong Zhang .    mpimat - the parallel matrix generated
4937d4036a1aSHong Zhang 
4938d4036a1aSHong Zhang     Level: advanced
4939d4036a1aSHong Zhang 
4940d4036a1aSHong Zhang    Notes:
4941d4036a1aSHong Zhang      The dimensions of the sequential matrix in each processor MUST be the same.
4942d4036a1aSHong Zhang      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4943d4036a1aSHong Zhang      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4944d4036a1aSHong Zhang @*/
494590431a8fSHong Zhang PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
494655d1abb9SHong Zhang {
49477e63b356SHong Zhang   PetscMPIInt    size;
494855d1abb9SHong Zhang 
494955d1abb9SHong Zhang   PetscFunctionBegin;
49509566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm,&size));
49517e63b356SHong Zhang   if (size == 1) {
49529566063dSJacob Faibussowitsch     PetscCall(PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0));
49537e63b356SHong Zhang     if (scall == MAT_INITIAL_MATRIX) {
49549566063dSJacob Faibussowitsch       PetscCall(MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat));
49557e63b356SHong Zhang     } else {
49569566063dSJacob Faibussowitsch       PetscCall(MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN));
49577e63b356SHong Zhang     }
49589566063dSJacob Faibussowitsch     PetscCall(PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0));
49597e63b356SHong Zhang     PetscFunctionReturn(0);
49607e63b356SHong Zhang   }
49619566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0));
496255d1abb9SHong Zhang   if (scall == MAT_INITIAL_MATRIX) {
49639566063dSJacob Faibussowitsch     PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat));
496455d1abb9SHong Zhang   }
49659566063dSJacob Faibussowitsch   PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat));
49669566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0));
496755d1abb9SHong Zhang   PetscFunctionReturn(0);
496855d1abb9SHong Zhang }
49694ebed01fSBarry Smith 
4970bc08b0f1SBarry Smith /*@
4971ef76dfe8SJed Brown      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
49728661ff28SBarry Smith           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
49738661ff28SBarry Smith           with MatGetSize()
497425616d81SHong Zhang 
497532fba14fSHong Zhang     Not Collective
497625616d81SHong Zhang 
497725616d81SHong Zhang    Input Parameters:
497825616d81SHong Zhang +    A - the matrix
4979a2b725a8SWilliam Gropp -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
498025616d81SHong Zhang 
498125616d81SHong Zhang    Output Parameter:
498225616d81SHong Zhang .    A_loc - the local sequential matrix generated
498325616d81SHong Zhang 
498425616d81SHong Zhang     Level: developer
498525616d81SHong Zhang 
498677c65a98SStefano Zampini    Notes:
498777c65a98SStefano Zampini      When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A.
498877c65a98SStefano Zampini      If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called.
498977c65a98SStefano Zampini      This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely
499077c65a98SStefano Zampini      modify the values of the returned A_loc.
499177c65a98SStefano Zampini 
4992ed502f03SStefano Zampini .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed(), MatMPIAIJGetLocalMatMerge()
499325616d81SHong Zhang @*/
49944a2b5492SBarry Smith PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
499525616d81SHong Zhang {
499601b7ae99SHong Zhang   Mat_MPIAIJ        *mpimat=(Mat_MPIAIJ*)A->data;
4997b78526a6SJose E. Roman   Mat_SeqAIJ        *mat,*a,*b;
4998b78526a6SJose E. Roman   PetscInt          *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4999ce496241SStefano Zampini   const PetscScalar *aa,*ba,*aav,*bav;
5000ce496241SStefano Zampini   PetscScalar       *ca,*cam;
500177c65a98SStefano Zampini   PetscMPIInt       size;
5002d0f46423SBarry Smith   PetscInt          am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
50035a7d977cSHong Zhang   PetscInt          *ci,*cj,col,ncols_d,ncols_o,jo;
50048661ff28SBarry Smith   PetscBool         match;
500525616d81SHong Zhang 
500625616d81SHong Zhang   PetscFunctionBegin;
50079566063dSJacob Faibussowitsch   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match));
500828b400f6SJacob Faibussowitsch   PetscCheck(match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
50099566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
501077c65a98SStefano Zampini   if (size == 1) {
501177c65a98SStefano Zampini     if (scall == MAT_INITIAL_MATRIX) {
50129566063dSJacob Faibussowitsch       PetscCall(PetscObjectReference((PetscObject)mpimat->A));
501377c65a98SStefano Zampini       *A_loc = mpimat->A;
501477c65a98SStefano Zampini     } else if (scall == MAT_REUSE_MATRIX) {
50159566063dSJacob Faibussowitsch       PetscCall(MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN));
501677c65a98SStefano Zampini     }
501777c65a98SStefano Zampini     PetscFunctionReturn(0);
501877c65a98SStefano Zampini   }
501970a9ba44SHong Zhang 
50209566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0));
5021b78526a6SJose E. Roman   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5022b78526a6SJose E. Roman   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5023b78526a6SJose E. Roman   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
50249566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayRead(mpimat->A,&aav));
50259566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayRead(mpimat->B,&bav));
5026ce496241SStefano Zampini   aa   = aav;
5027ce496241SStefano Zampini   ba   = bav;
502801b7ae99SHong Zhang   if (scall == MAT_INITIAL_MATRIX) {
50299566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(1+am,&ci));
5030dea91ad1SHong Zhang     ci[0] = 0;
503101b7ae99SHong Zhang     for (i=0; i<am; i++) {
5032dea91ad1SHong Zhang       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
503301b7ae99SHong Zhang     }
50349566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(1+ci[am],&cj));
50359566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(1+ci[am],&ca));
5036dea91ad1SHong Zhang     k    = 0;
503701b7ae99SHong Zhang     for (i=0; i<am; i++) {
50385a7d977cSHong Zhang       ncols_o = bi[i+1] - bi[i];
50395a7d977cSHong Zhang       ncols_d = ai[i+1] - ai[i];
504001b7ae99SHong Zhang       /* off-diagonal portion of A */
50415a7d977cSHong Zhang       for (jo=0; jo<ncols_o; jo++) {
50425a7d977cSHong Zhang         col = cmap[*bj];
50435a7d977cSHong Zhang         if (col >= cstart) break;
50445a7d977cSHong Zhang         cj[k]   = col; bj++;
50455a7d977cSHong Zhang         ca[k++] = *ba++;
50465a7d977cSHong Zhang       }
50475a7d977cSHong Zhang       /* diagonal portion of A */
50485a7d977cSHong Zhang       for (j=0; j<ncols_d; j++) {
50495a7d977cSHong Zhang         cj[k]   = cstart + *aj++;
50505a7d977cSHong Zhang         ca[k++] = *aa++;
50515a7d977cSHong Zhang       }
50525a7d977cSHong Zhang       /* off-diagonal portion of A */
50535a7d977cSHong Zhang       for (j=jo; j<ncols_o; j++) {
50545a7d977cSHong Zhang         cj[k]   = cmap[*bj++];
50555a7d977cSHong Zhang         ca[k++] = *ba++;
50565a7d977cSHong Zhang       }
505725616d81SHong Zhang     }
5058dea91ad1SHong Zhang     /* put together the new matrix */
50599566063dSJacob Faibussowitsch     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc));
5060dea91ad1SHong Zhang     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5061dea91ad1SHong Zhang     /* Since these are PETSc arrays, change flags to free them as necessary. */
5062dea91ad1SHong Zhang     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5063e6b907acSBarry Smith     mat->free_a  = PETSC_TRUE;
5064e6b907acSBarry Smith     mat->free_ij = PETSC_TRUE;
5065dea91ad1SHong Zhang     mat->nonew   = 0;
50665a7d977cSHong Zhang   } else if (scall == MAT_REUSE_MATRIX) {
50675a7d977cSHong Zhang     mat  =(Mat_SeqAIJ*)(*A_loc)->data;
5068fff043a9SJunchao Zhang     ci   = mat->i;
5069fff043a9SJunchao Zhang     cj   = mat->j;
50709566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJGetArrayWrite(*A_loc,&cam));
50715a7d977cSHong Zhang     for (i=0; i<am; i++) {
50725a7d977cSHong Zhang       /* off-diagonal portion of A */
50735a7d977cSHong Zhang       ncols_o = bi[i+1] - bi[i];
50745a7d977cSHong Zhang       for (jo=0; jo<ncols_o; jo++) {
50755a7d977cSHong Zhang         col = cmap[*bj];
50765a7d977cSHong Zhang         if (col >= cstart) break;
5077a77337e4SBarry Smith         *cam++ = *ba++; bj++;
50785a7d977cSHong Zhang       }
50795a7d977cSHong Zhang       /* diagonal portion of A */
5080ecc9b87dSHong Zhang       ncols_d = ai[i+1] - ai[i];
5081a77337e4SBarry Smith       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
50825a7d977cSHong Zhang       /* off-diagonal portion of A */
5083f33d1a9aSHong Zhang       for (j=jo; j<ncols_o; j++) {
5084a77337e4SBarry Smith         *cam++ = *ba++; bj++;
5085f33d1a9aSHong Zhang       }
50865a7d977cSHong Zhang     }
50879566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc,&cam));
508898921bdaSJacob Faibussowitsch   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
50899566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A,&aav));
50909566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B,&bav));
50919566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0));
509225616d81SHong Zhang   PetscFunctionReturn(0);
509325616d81SHong Zhang }
509425616d81SHong Zhang 
5095ed502f03SStefano Zampini /*@
5096ed502f03SStefano Zampini      MatMPIAIJGetLocalMatMerge - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5097ed502f03SStefano Zampini           mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part
5098ed502f03SStefano Zampini 
5099ed502f03SStefano Zampini     Not Collective
5100ed502f03SStefano Zampini 
5101ed502f03SStefano Zampini    Input Parameters:
5102ed502f03SStefano Zampini +    A - the matrix
5103ed502f03SStefano Zampini -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5104ed502f03SStefano Zampini 
5105d8d19677SJose E. Roman    Output Parameters:
5106ed502f03SStefano Zampini +    glob - sequential IS with global indices associated with the columns of the local sequential matrix generated (can be NULL)
5107ed502f03SStefano Zampini -    A_loc - the local sequential matrix generated
5108ed502f03SStefano Zampini 
5109ed502f03SStefano Zampini     Level: developer
5110ed502f03SStefano Zampini 
5111ed502f03SStefano Zampini    Notes:
5112ec446438SStefano Zampini      This is different from MatMPIAIJGetLocalMat() since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the offdiagonal part (in its local ordering)
5113ed502f03SStefano Zampini 
5114ed502f03SStefano Zampini .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed()
5115ed502f03SStefano Zampini 
5116ed502f03SStefano Zampini @*/
5117ed502f03SStefano Zampini PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A,MatReuse scall,IS *glob,Mat *A_loc)
5118ed502f03SStefano Zampini {
5119ed502f03SStefano Zampini   Mat            Ao,Ad;
5120ed502f03SStefano Zampini   const PetscInt *cmap;
5121ed502f03SStefano Zampini   PetscMPIInt    size;
5122ed502f03SStefano Zampini   PetscErrorCode (*f)(Mat,MatReuse,IS*,Mat*);
5123ed502f03SStefano Zampini 
5124ed502f03SStefano Zampini   PetscFunctionBegin;
51259566063dSJacob Faibussowitsch   PetscCall(MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&cmap));
51269566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
5127ed502f03SStefano Zampini   if (size == 1) {
5128ed502f03SStefano Zampini     if (scall == MAT_INITIAL_MATRIX) {
51299566063dSJacob Faibussowitsch       PetscCall(PetscObjectReference((PetscObject)Ad));
5130ed502f03SStefano Zampini       *A_loc = Ad;
5131ed502f03SStefano Zampini     } else if (scall == MAT_REUSE_MATRIX) {
51329566063dSJacob Faibussowitsch       PetscCall(MatCopy(Ad,*A_loc,SAME_NONZERO_PATTERN));
5133ed502f03SStefano Zampini     }
51349566063dSJacob Faibussowitsch     if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad),Ad->cmap->n,Ad->cmap->rstart,1,glob));
5135ed502f03SStefano Zampini     PetscFunctionReturn(0);
5136ed502f03SStefano Zampini   }
51379566063dSJacob Faibussowitsch   PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJGetLocalMatMerge_C",&f));
51389566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0));
5139ed502f03SStefano Zampini   if (f) {
51409566063dSJacob Faibussowitsch     PetscCall((*f)(A,scall,glob,A_loc));
5141ed502f03SStefano Zampini   } else {
5142ed502f03SStefano Zampini     Mat_SeqAIJ        *a = (Mat_SeqAIJ*)Ad->data;
5143ed502f03SStefano Zampini     Mat_SeqAIJ        *b = (Mat_SeqAIJ*)Ao->data;
5144ed502f03SStefano Zampini     Mat_SeqAIJ        *c;
5145ed502f03SStefano Zampini     PetscInt          *ai = a->i, *aj = a->j;
5146ed502f03SStefano Zampini     PetscInt          *bi = b->i, *bj = b->j;
5147ed502f03SStefano Zampini     PetscInt          *ci,*cj;
5148ed502f03SStefano Zampini     const PetscScalar *aa,*ba;
5149ed502f03SStefano Zampini     PetscScalar       *ca;
5150ed502f03SStefano Zampini     PetscInt          i,j,am,dn,on;
5151ed502f03SStefano Zampini 
51529566063dSJacob Faibussowitsch     PetscCall(MatGetLocalSize(Ad,&am,&dn));
51539566063dSJacob Faibussowitsch     PetscCall(MatGetLocalSize(Ao,NULL,&on));
51549566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJGetArrayRead(Ad,&aa));
51559566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJGetArrayRead(Ao,&ba));
5156ed502f03SStefano Zampini     if (scall == MAT_INITIAL_MATRIX) {
5157ed502f03SStefano Zampini       PetscInt k;
51589566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(1+am,&ci));
51599566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(ai[am]+bi[am],&cj));
51609566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(ai[am]+bi[am],&ca));
5161ed502f03SStefano Zampini       ci[0] = 0;
5162ed502f03SStefano Zampini       for (i=0,k=0; i<am; i++) {
5163ed502f03SStefano Zampini         const PetscInt ncols_o = bi[i+1] - bi[i];
5164ed502f03SStefano Zampini         const PetscInt ncols_d = ai[i+1] - ai[i];
5165ed502f03SStefano Zampini         ci[i+1] = ci[i] + ncols_o + ncols_d;
5166ed502f03SStefano Zampini         /* diagonal portion of A */
5167ed502f03SStefano Zampini         for (j=0; j<ncols_d; j++,k++) {
5168ed502f03SStefano Zampini           cj[k] = *aj++;
5169ed502f03SStefano Zampini           ca[k] = *aa++;
5170ed502f03SStefano Zampini         }
5171ed502f03SStefano Zampini         /* off-diagonal portion of A */
5172ed502f03SStefano Zampini         for (j=0; j<ncols_o; j++,k++) {
5173ed502f03SStefano Zampini           cj[k] = dn + *bj++;
5174ed502f03SStefano Zampini           ca[k] = *ba++;
5175ed502f03SStefano Zampini         }
5176ed502f03SStefano Zampini       }
5177ed502f03SStefano Zampini       /* put together the new matrix */
51789566063dSJacob Faibussowitsch       PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,dn+on,ci,cj,ca,A_loc));
5179ed502f03SStefano Zampini       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5180ed502f03SStefano Zampini       /* Since these are PETSc arrays, change flags to free them as necessary. */
5181ed502f03SStefano Zampini       c          = (Mat_SeqAIJ*)(*A_loc)->data;
5182ed502f03SStefano Zampini       c->free_a  = PETSC_TRUE;
5183ed502f03SStefano Zampini       c->free_ij = PETSC_TRUE;
5184ed502f03SStefano Zampini       c->nonew   = 0;
51859566063dSJacob Faibussowitsch       PetscCall(MatSetType(*A_loc,((PetscObject)Ad)->type_name));
5186ed502f03SStefano Zampini     } else if (scall == MAT_REUSE_MATRIX) {
51879566063dSJacob Faibussowitsch       PetscCall(MatSeqAIJGetArrayWrite(*A_loc,&ca));
5188ed502f03SStefano Zampini       for (i=0; i<am; i++) {
5189ed502f03SStefano Zampini         const PetscInt ncols_d = ai[i+1] - ai[i];
5190ed502f03SStefano Zampini         const PetscInt ncols_o = bi[i+1] - bi[i];
5191ed502f03SStefano Zampini         /* diagonal portion of A */
5192ed502f03SStefano Zampini         for (j=0; j<ncols_d; j++) *ca++ = *aa++;
5193ed502f03SStefano Zampini         /* off-diagonal portion of A */
5194ed502f03SStefano Zampini         for (j=0; j<ncols_o; j++) *ca++ = *ba++;
5195ed502f03SStefano Zampini       }
51969566063dSJacob Faibussowitsch       PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc,&ca));
519798921bdaSJacob Faibussowitsch     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
51989566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJRestoreArrayRead(Ad,&aa));
51999566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJRestoreArrayRead(Ao,&aa));
5200ed502f03SStefano Zampini     if (glob) {
5201ed502f03SStefano Zampini       PetscInt cst, *gidx;
5202ed502f03SStefano Zampini 
52039566063dSJacob Faibussowitsch       PetscCall(MatGetOwnershipRangeColumn(A,&cst,NULL));
52049566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(dn+on,&gidx));
5205ed502f03SStefano Zampini       for (i=0; i<dn; i++) gidx[i]    = cst + i;
5206ed502f03SStefano Zampini       for (i=0; i<on; i++) gidx[i+dn] = cmap[i];
52079566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad),dn+on,gidx,PETSC_OWN_POINTER,glob));
5208ed502f03SStefano Zampini     }
5209ed502f03SStefano Zampini   }
52109566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0));
5211ed502f03SStefano Zampini   PetscFunctionReturn(0);
5212ed502f03SStefano Zampini }
5213ed502f03SStefano Zampini 
521432fba14fSHong Zhang /*@C
52155f4d30c4SBarry Smith      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
521632fba14fSHong Zhang 
521732fba14fSHong Zhang     Not Collective
521832fba14fSHong Zhang 
521932fba14fSHong Zhang    Input Parameters:
522032fba14fSHong Zhang +    A - the matrix
522132fba14fSHong Zhang .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
52220298fd71SBarry Smith -    row, col - index sets of rows and columns to extract (or NULL)
522332fba14fSHong Zhang 
522432fba14fSHong Zhang    Output Parameter:
522532fba14fSHong Zhang .    A_loc - the local sequential matrix generated
522632fba14fSHong Zhang 
522732fba14fSHong Zhang     Level: developer
522832fba14fSHong Zhang 
5229ba264940SBarry Smith .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5230ba264940SBarry Smith 
523132fba14fSHong Zhang @*/
52324a2b5492SBarry Smith PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
523332fba14fSHong Zhang {
523432fba14fSHong Zhang   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
523532fba14fSHong Zhang   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
523632fba14fSHong Zhang   IS             isrowa,iscola;
523732fba14fSHong Zhang   Mat            *aloc;
52384a2b5492SBarry Smith   PetscBool      match;
523932fba14fSHong Zhang 
524032fba14fSHong Zhang   PetscFunctionBegin;
52419566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match));
524228b400f6SJacob Faibussowitsch   PetscCheck(match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
52439566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0));
524432fba14fSHong Zhang   if (!row) {
5245d0f46423SBarry Smith     start = A->rmap->rstart; end = A->rmap->rend;
52469566063dSJacob Faibussowitsch     PetscCall(ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa));
524732fba14fSHong Zhang   } else {
524832fba14fSHong Zhang     isrowa = *row;
524932fba14fSHong Zhang   }
525032fba14fSHong Zhang   if (!col) {
5251d0f46423SBarry Smith     start = A->cmap->rstart;
525232fba14fSHong Zhang     cmap  = a->garray;
5253d0f46423SBarry Smith     nzA   = a->A->cmap->n;
5254d0f46423SBarry Smith     nzB   = a->B->cmap->n;
52559566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nzA+nzB, &idx));
525632fba14fSHong Zhang     ncols = 0;
525732fba14fSHong Zhang     for (i=0; i<nzB; i++) {
525832fba14fSHong Zhang       if (cmap[i] < start) idx[ncols++] = cmap[i];
525932fba14fSHong Zhang       else break;
526032fba14fSHong Zhang     }
526132fba14fSHong Zhang     imark = i;
526232fba14fSHong Zhang     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
526332fba14fSHong Zhang     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
52649566063dSJacob Faibussowitsch     PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola));
526532fba14fSHong Zhang   } else {
526632fba14fSHong Zhang     iscola = *col;
526732fba14fSHong Zhang   }
526832fba14fSHong Zhang   if (scall != MAT_INITIAL_MATRIX) {
52699566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(1,&aloc));
527032fba14fSHong Zhang     aloc[0] = *A_loc;
527132fba14fSHong Zhang   }
52729566063dSJacob Faibussowitsch   PetscCall(MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc));
5273109e0772SStefano Zampini   if (!col) { /* attach global id of condensed columns */
52749566063dSJacob Faibussowitsch     PetscCall(PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola));
5275109e0772SStefano Zampini   }
527632fba14fSHong Zhang   *A_loc = aloc[0];
52779566063dSJacob Faibussowitsch   PetscCall(PetscFree(aloc));
527832fba14fSHong Zhang   if (!row) {
52799566063dSJacob Faibussowitsch     PetscCall(ISDestroy(&isrowa));
528032fba14fSHong Zhang   }
528132fba14fSHong Zhang   if (!col) {
52829566063dSJacob Faibussowitsch     PetscCall(ISDestroy(&iscola));
528332fba14fSHong Zhang   }
52849566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0));
528532fba14fSHong Zhang   PetscFunctionReturn(0);
528632fba14fSHong Zhang }
528732fba14fSHong Zhang 
52885c65b9ecSFande Kong /*
52895c65b9ecSFande Kong  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
52905c65b9ecSFande Kong  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
52915c65b9ecSFande Kong  * on a global size.
52925c65b9ecSFande Kong  * */
52935c65b9ecSFande Kong PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth)
52945c65b9ecSFande Kong {
52955c65b9ecSFande Kong   Mat_MPIAIJ               *p=(Mat_MPIAIJ*)P->data;
52965c65b9ecSFande Kong   Mat_SeqAIJ               *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth;
5297131c27b5Sprj-   PetscInt                 plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol;
5298131c27b5Sprj-   PetscMPIInt              owner;
52995c65b9ecSFande Kong   PetscSFNode              *iremote,*oiremote;
53005c65b9ecSFande Kong   const PetscInt           *lrowindices;
53015c65b9ecSFande Kong   PetscSF                  sf,osf;
53025c65b9ecSFande Kong   PetscInt                 pcstart,*roffsets,*loffsets,*pnnz,j;
53035c65b9ecSFande Kong   PetscInt                 ontotalcols,dntotalcols,ntotalcols,nout;
53045c65b9ecSFande Kong   MPI_Comm                 comm;
53055c65b9ecSFande Kong   ISLocalToGlobalMapping   mapping;
5306fff043a9SJunchao Zhang   const PetscScalar        *pd_a,*po_a;
53075c65b9ecSFande Kong 
53085c65b9ecSFande Kong   PetscFunctionBegin;
53099566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)P,&comm));
53105c65b9ecSFande Kong   /* plocalsize is the number of roots
53115c65b9ecSFande Kong    * nrows is the number of leaves
53125c65b9ecSFande Kong    * */
53139566063dSJacob Faibussowitsch   PetscCall(MatGetLocalSize(P,&plocalsize,NULL));
53149566063dSJacob Faibussowitsch   PetscCall(ISGetLocalSize(rows,&nrows));
53159566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(nrows,&iremote));
53169566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(rows,&lrowindices));
53175c65b9ecSFande Kong   for (i=0;i<nrows;i++) {
53185c65b9ecSFande Kong     /* Find a remote index and an owner for a row
53195c65b9ecSFande Kong      * The row could be local or remote
53205c65b9ecSFande Kong      * */
532134bcad68SFande Kong     owner = 0;
532234bcad68SFande Kong     lidx  = 0;
53239566063dSJacob Faibussowitsch     PetscCall(PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx));
53245c65b9ecSFande Kong     iremote[i].index = lidx;
53255c65b9ecSFande Kong     iremote[i].rank  = owner;
53265c65b9ecSFande Kong   }
53275c65b9ecSFande Kong   /* Create SF to communicate how many nonzero columns for each row */
53289566063dSJacob Faibussowitsch   PetscCall(PetscSFCreate(comm,&sf));
53295c65b9ecSFande Kong   /* SF will figure out the number of nonzero colunms for each row, and their
53305c65b9ecSFande Kong    * offsets
53315c65b9ecSFande Kong    * */
53329566063dSJacob Faibussowitsch   PetscCall(PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER));
53339566063dSJacob Faibussowitsch   PetscCall(PetscSFSetFromOptions(sf));
53349566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(sf));
5335bc8e477aSFande Kong 
53369566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(2*(plocalsize+1),&roffsets));
53379566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(2*plocalsize,&nrcols));
53389566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(nrows,&pnnz));
53395c65b9ecSFande Kong   roffsets[0] = 0;
53405c65b9ecSFande Kong   roffsets[1] = 0;
53415c65b9ecSFande Kong   for (i=0;i<plocalsize;i++) {
53425c65b9ecSFande Kong     /* diag */
53435c65b9ecSFande Kong     nrcols[i*2+0] = pd->i[i+1] - pd->i[i];
53445c65b9ecSFande Kong     /* off diag */
53455c65b9ecSFande Kong     nrcols[i*2+1] = po->i[i+1] - po->i[i];
53465c65b9ecSFande Kong     /* compute offsets so that we relative location for each row */
53475c65b9ecSFande Kong     roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0];
53485c65b9ecSFande Kong     roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1];
53495c65b9ecSFande Kong   }
53509566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(2*nrows,&nlcols));
53519566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(2*nrows,&loffsets));
53525c65b9ecSFande Kong   /* 'r' means root, and 'l' means leaf */
53539566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE));
53549566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE));
53559566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE));
53569566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE));
53579566063dSJacob Faibussowitsch   PetscCall(PetscSFDestroy(&sf));
53589566063dSJacob Faibussowitsch   PetscCall(PetscFree(roffsets));
53599566063dSJacob Faibussowitsch   PetscCall(PetscFree(nrcols));
53605c65b9ecSFande Kong   dntotalcols = 0;
53615c65b9ecSFande Kong   ontotalcols = 0;
5362bc8e477aSFande Kong   ncol = 0;
53635c65b9ecSFande Kong   for (i=0;i<nrows;i++) {
53645c65b9ecSFande Kong     pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1];
5365bc8e477aSFande Kong     ncol = PetscMax(pnnz[i],ncol);
53665c65b9ecSFande Kong     /* diag */
53675c65b9ecSFande Kong     dntotalcols += nlcols[i*2+0];
53685c65b9ecSFande Kong     /* off diag */
53695c65b9ecSFande Kong     ontotalcols += nlcols[i*2+1];
53705c65b9ecSFande Kong   }
53715c65b9ecSFande Kong   /* We do not need to figure the right number of columns
53725c65b9ecSFande Kong    * since all the calculations will be done by going through the raw data
53735c65b9ecSFande Kong    * */
53749566063dSJacob Faibussowitsch   PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth));
53759566063dSJacob Faibussowitsch   PetscCall(MatSetUp(*P_oth));
53769566063dSJacob Faibussowitsch   PetscCall(PetscFree(pnnz));
53775c65b9ecSFande Kong   p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
53785c65b9ecSFande Kong   /* diag */
53799566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(dntotalcols,&iremote));
53805c65b9ecSFande Kong   /* off diag */
53819566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(ontotalcols,&oiremote));
53825c65b9ecSFande Kong   /* diag */
53839566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(dntotalcols,&ilocal));
53845c65b9ecSFande Kong   /* off diag */
53859566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(ontotalcols,&oilocal));
53865c65b9ecSFande Kong   dntotalcols = 0;
53875c65b9ecSFande Kong   ontotalcols = 0;
53885c65b9ecSFande Kong   ntotalcols  = 0;
53895c65b9ecSFande Kong   for (i=0;i<nrows;i++) {
539034bcad68SFande Kong     owner = 0;
53919566063dSJacob Faibussowitsch     PetscCall(PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL));
53925c65b9ecSFande Kong     /* Set iremote for diag matrix */
53935c65b9ecSFande Kong     for (j=0;j<nlcols[i*2+0];j++) {
53945c65b9ecSFande Kong       iremote[dntotalcols].index   = loffsets[i*2+0] + j;
53955c65b9ecSFande Kong       iremote[dntotalcols].rank    = owner;
53965c65b9ecSFande Kong       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
53975c65b9ecSFande Kong       ilocal[dntotalcols++]        = ntotalcols++;
53985c65b9ecSFande Kong     }
53995c65b9ecSFande Kong     /* off diag */
54005c65b9ecSFande Kong     for (j=0;j<nlcols[i*2+1];j++) {
54015c65b9ecSFande Kong       oiremote[ontotalcols].index   = loffsets[i*2+1] + j;
54025c65b9ecSFande Kong       oiremote[ontotalcols].rank    = owner;
54035c65b9ecSFande Kong       oilocal[ontotalcols++]        = ntotalcols++;
54045c65b9ecSFande Kong     }
54055c65b9ecSFande Kong   }
54069566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(rows,&lrowindices));
54079566063dSJacob Faibussowitsch   PetscCall(PetscFree(loffsets));
54089566063dSJacob Faibussowitsch   PetscCall(PetscFree(nlcols));
54099566063dSJacob Faibussowitsch   PetscCall(PetscSFCreate(comm,&sf));
54105c65b9ecSFande Kong   /* P serves as roots and P_oth is leaves
54115c65b9ecSFande Kong    * Diag matrix
54125c65b9ecSFande Kong    * */
54139566063dSJacob Faibussowitsch   PetscCall(PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER));
54149566063dSJacob Faibussowitsch   PetscCall(PetscSFSetFromOptions(sf));
54159566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(sf));
54165c65b9ecSFande Kong 
54179566063dSJacob Faibussowitsch   PetscCall(PetscSFCreate(comm,&osf));
54185c65b9ecSFande Kong   /* Off diag */
54199566063dSJacob Faibussowitsch   PetscCall(PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER));
54209566063dSJacob Faibussowitsch   PetscCall(PetscSFSetFromOptions(osf));
54219566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(osf));
54229566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayRead(p->A,&pd_a));
54239566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayRead(p->B,&po_a));
54245c65b9ecSFande Kong   /* We operate on the matrix internal data for saving memory */
54259566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE));
54269566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE));
54279566063dSJacob Faibussowitsch   PetscCall(MatGetOwnershipRangeColumn(P,&pcstart,NULL));
54285c65b9ecSFande Kong   /* Convert to global indices for diag matrix */
54295c65b9ecSFande Kong   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart;
54309566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE));
54315c65b9ecSFande Kong   /* We want P_oth store global indices */
54329566063dSJacob Faibussowitsch   PetscCall(ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping));
54335c65b9ecSFande Kong   /* Use memory scalable approach */
54349566063dSJacob Faibussowitsch   PetscCall(ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH));
54359566063dSJacob Faibussowitsch   PetscCall(ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j));
54369566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE));
54379566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE));
54385c65b9ecSFande Kong   /* Convert back to local indices */
54395c65b9ecSFande Kong   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart;
54409566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE));
54415c65b9ecSFande Kong   nout = 0;
54429566063dSJacob Faibussowitsch   PetscCall(ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j));
544308401ef6SPierre Jolivet   PetscCheck(nout == po->i[plocalsize],comm,PETSC_ERR_ARG_INCOMP,"n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ",po->i[plocalsize],nout);
54449566063dSJacob Faibussowitsch   PetscCall(ISLocalToGlobalMappingDestroy(&mapping));
54455c65b9ecSFande Kong   /* Exchange values */
54469566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE));
54479566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE));
54489566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayRead(p->A,&pd_a));
54499566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayRead(p->B,&po_a));
54505c65b9ecSFande Kong   /* Stop PETSc from shrinking memory */
54515c65b9ecSFande Kong   for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i];
54529566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY));
54539566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY));
54545c65b9ecSFande Kong   /* Attach PetscSF objects to P_oth so that we can reuse it later */
54559566063dSJacob Faibussowitsch   PetscCall(PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf));
54569566063dSJacob Faibussowitsch   PetscCall(PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf));
54579566063dSJacob Faibussowitsch   PetscCall(PetscSFDestroy(&sf));
54589566063dSJacob Faibussowitsch   PetscCall(PetscSFDestroy(&osf));
54595c65b9ecSFande Kong   PetscFunctionReturn(0);
54605c65b9ecSFande Kong }
54615c65b9ecSFande Kong 
54625c65b9ecSFande Kong /*
54635c65b9ecSFande Kong  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
54645c65b9ecSFande Kong  * This supports MPIAIJ and MAIJ
54655c65b9ecSFande Kong  * */
5466bc8e477aSFande Kong PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth)
54675c65b9ecSFande Kong {
54685c65b9ecSFande Kong   Mat_MPIAIJ            *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data;
5469bc8e477aSFande Kong   Mat_SeqAIJ            *p_oth;
5470bc8e477aSFande Kong   IS                    rows,map;
5471bc8e477aSFande Kong   PetscHMapI            hamp;
5472bc8e477aSFande Kong   PetscInt              i,htsize,*rowindices,off,*mapping,key,count;
54735c65b9ecSFande Kong   MPI_Comm              comm;
54745c65b9ecSFande Kong   PetscSF               sf,osf;
5475bc8e477aSFande Kong   PetscBool             has;
54765c65b9ecSFande Kong 
54775c65b9ecSFande Kong   PetscFunctionBegin;
54789566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
54799566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0));
54805c65b9ecSFande Kong   /* If it is the first time, create an index set of off-diag nonzero columns of A,
54815c65b9ecSFande Kong    *  and then create a submatrix (that often is an overlapping matrix)
54825c65b9ecSFande Kong    * */
54835c65b9ecSFande Kong   if (reuse == MAT_INITIAL_MATRIX) {
54845c65b9ecSFande Kong     /* Use a hash table to figure out unique keys */
54859566063dSJacob Faibussowitsch     PetscCall(PetscHMapICreate(&hamp));
54869566063dSJacob Faibussowitsch     PetscCall(PetscHMapIResize(hamp,a->B->cmap->n));
54879566063dSJacob Faibussowitsch     PetscCall(PetscCalloc1(a->B->cmap->n,&mapping));
5488bc8e477aSFande Kong     count = 0;
5489bc8e477aSFande Kong     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5490bc8e477aSFande Kong     for (i=0;i<a->B->cmap->n;i++) {
5491bc8e477aSFande Kong       key  = a->garray[i]/dof;
54929566063dSJacob Faibussowitsch       PetscCall(PetscHMapIHas(hamp,key,&has));
5493bc8e477aSFande Kong       if (!has) {
5494bc8e477aSFande Kong         mapping[i] = count;
54959566063dSJacob Faibussowitsch         PetscCall(PetscHMapISet(hamp,key,count++));
5496bc8e477aSFande Kong       } else {
5497bc8e477aSFande Kong         /* Current 'i' has the same value the previous step */
5498bc8e477aSFande Kong         mapping[i] = count-1;
54995c65b9ecSFande Kong       }
5500bc8e477aSFande Kong     }
55019566063dSJacob Faibussowitsch     PetscCall(ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map));
55029566063dSJacob Faibussowitsch     PetscCall(PetscHMapIGetSize(hamp,&htsize));
550308401ef6SPierre Jolivet     PetscCheck(htsize==count,comm,PETSC_ERR_ARG_INCOMP," Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT " ",htsize,count);
55049566063dSJacob Faibussowitsch     PetscCall(PetscCalloc1(htsize,&rowindices));
55055c65b9ecSFande Kong     off = 0;
55069566063dSJacob Faibussowitsch     PetscCall(PetscHMapIGetKeys(hamp,&off,rowindices));
55079566063dSJacob Faibussowitsch     PetscCall(PetscHMapIDestroy(&hamp));
55089566063dSJacob Faibussowitsch     PetscCall(PetscSortInt(htsize,rowindices));
55099566063dSJacob Faibussowitsch     PetscCall(ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows));
55105c65b9ecSFande Kong     /* In case, the matrix was already created but users want to recreate the matrix */
55119566063dSJacob Faibussowitsch     PetscCall(MatDestroy(P_oth));
55129566063dSJacob Faibussowitsch     PetscCall(MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth));
55139566063dSJacob Faibussowitsch     PetscCall(PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map));
55149566063dSJacob Faibussowitsch     PetscCall(ISDestroy(&map));
55159566063dSJacob Faibussowitsch     PetscCall(ISDestroy(&rows));
55165c65b9ecSFande Kong   } else if (reuse == MAT_REUSE_MATRIX) {
55175c65b9ecSFande Kong     /* If matrix was already created, we simply update values using SF objects
55185c65b9ecSFande Kong      * that as attached to the matrix ealier.
5519fff043a9SJunchao Zhang      */
5520fff043a9SJunchao Zhang     const PetscScalar *pd_a,*po_a;
5521fff043a9SJunchao Zhang 
55229566063dSJacob Faibussowitsch     PetscCall(PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf));
55239566063dSJacob Faibussowitsch     PetscCall(PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf));
552408401ef6SPierre Jolivet     PetscCheck(sf && osf,comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet");
55255c65b9ecSFande Kong     p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
55265c65b9ecSFande Kong     /* Update values in place */
55279566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJGetArrayRead(p->A,&pd_a));
55289566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJGetArrayRead(p->B,&po_a));
55299566063dSJacob Faibussowitsch     PetscCall(PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE));
55309566063dSJacob Faibussowitsch     PetscCall(PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE));
55319566063dSJacob Faibussowitsch     PetscCall(PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE));
55329566063dSJacob Faibussowitsch     PetscCall(PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE));
55339566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJRestoreArrayRead(p->A,&pd_a));
55349566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJRestoreArrayRead(p->B,&po_a));
55356718818eSStefano Zampini   } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type");
55369566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0));
55375c65b9ecSFande Kong   PetscFunctionReturn(0);
55385c65b9ecSFande Kong }
55395c65b9ecSFande Kong 
554025616d81SHong Zhang /*@C
554132fba14fSHong Zhang   MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
554225616d81SHong Zhang 
554325616d81SHong Zhang   Collective on Mat
554425616d81SHong Zhang 
554525616d81SHong Zhang   Input Parameters:
55466b867d5aSJose E. Roman + A - the first matrix in mpiaij format
55476b867d5aSJose E. Roman . B - the second matrix in mpiaij format
55486b867d5aSJose E. Roman - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
554925616d81SHong Zhang 
5550f1a722f8SMatthew G. Knepley   Output Parameters:
5551f1a722f8SMatthew G. Knepley + rowb - On input index sets of rows of B to extract (or NULL), modified on output
5552f1a722f8SMatthew G. Knepley . colb - On input index sets of columns of B to extract (or NULL), modified on output
5553f1a722f8SMatthew G. Knepley - B_seq - the sequential matrix generated
555425616d81SHong Zhang 
555525616d81SHong Zhang   Level: developer
555625616d81SHong Zhang 
555725616d81SHong Zhang @*/
555866bfb163SHong Zhang PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
555925616d81SHong Zhang {
5560899cda47SBarry Smith   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5561b1d57f15SBarry Smith   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
556225616d81SHong Zhang   IS             isrowb,iscolb;
55630298fd71SBarry Smith   Mat            *bseq=NULL;
556425616d81SHong Zhang 
556525616d81SHong Zhang   PetscFunctionBegin;
5566d0f46423SBarry Smith   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
556798921bdaSJacob Faibussowitsch     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
556825616d81SHong Zhang   }
55699566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0));
557025616d81SHong Zhang 
557125616d81SHong Zhang   if (scall == MAT_INITIAL_MATRIX) {
5572d0f46423SBarry Smith     start = A->cmap->rstart;
557325616d81SHong Zhang     cmap  = a->garray;
5574d0f46423SBarry Smith     nzA   = a->A->cmap->n;
5575d0f46423SBarry Smith     nzB   = a->B->cmap->n;
55769566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nzA+nzB, &idx));
557725616d81SHong Zhang     ncols = 0;
55780390132cSHong Zhang     for (i=0; i<nzB; i++) {  /* row < local row index */
557925616d81SHong Zhang       if (cmap[i] < start) idx[ncols++] = cmap[i];
558025616d81SHong Zhang       else break;
558125616d81SHong Zhang     }
558225616d81SHong Zhang     imark = i;
55830390132cSHong Zhang     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
55840390132cSHong Zhang     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
55859566063dSJacob Faibussowitsch     PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb));
55869566063dSJacob Faibussowitsch     PetscCall(ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb));
558725616d81SHong Zhang   } else {
558808401ef6SPierre Jolivet     PetscCheck(rowb && colb,PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
558925616d81SHong Zhang     isrowb  = *rowb; iscolb = *colb;
55909566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(1,&bseq));
559125616d81SHong Zhang     bseq[0] = *B_seq;
559225616d81SHong Zhang   }
55939566063dSJacob Faibussowitsch   PetscCall(MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq));
559425616d81SHong Zhang   *B_seq = bseq[0];
55959566063dSJacob Faibussowitsch   PetscCall(PetscFree(bseq));
559625616d81SHong Zhang   if (!rowb) {
55979566063dSJacob Faibussowitsch     PetscCall(ISDestroy(&isrowb));
559825616d81SHong Zhang   } else {
559925616d81SHong Zhang     *rowb = isrowb;
560025616d81SHong Zhang   }
560125616d81SHong Zhang   if (!colb) {
56029566063dSJacob Faibussowitsch     PetscCall(ISDestroy(&iscolb));
560325616d81SHong Zhang   } else {
560425616d81SHong Zhang     *colb = iscolb;
560525616d81SHong Zhang   }
56069566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0));
560725616d81SHong Zhang   PetscFunctionReturn(0);
560825616d81SHong Zhang }
5609429d309bSHong Zhang 
5610f8487c73SHong Zhang /*
5611f8487c73SHong Zhang     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
561201b7ae99SHong Zhang     of the OFF-DIAGONAL portion of local A
5613429d309bSHong Zhang 
5614429d309bSHong Zhang     Collective on Mat
5615429d309bSHong Zhang 
5616429d309bSHong Zhang    Input Parameters:
5617429d309bSHong Zhang +    A,B - the matrices in mpiaij format
5618598bc09dSHong Zhang -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5619429d309bSHong Zhang 
5620429d309bSHong Zhang    Output Parameter:
56210298fd71SBarry Smith +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
56220298fd71SBarry Smith .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
56230298fd71SBarry Smith .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5624598bc09dSHong Zhang -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5625429d309bSHong Zhang 
56266eb45d04SBarry Smith     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
56276eb45d04SBarry Smith      for this matrix. This is not desirable..
56286eb45d04SBarry Smith 
5629429d309bSHong Zhang     Level: developer
5630429d309bSHong Zhang 
5631f8487c73SHong Zhang */
5632b7f45c76SHong Zhang PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5633429d309bSHong Zhang {
5634899cda47SBarry Smith   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
563587025532SHong Zhang   Mat_SeqAIJ             *b_oth;
56364b8d542aSHong Zhang   VecScatter             ctx;
5637ce94432eSBarry Smith   MPI_Comm               comm;
56383515ee7fSJunchao Zhang   const PetscMPIInt      *rprocs,*sprocs;
56393515ee7fSJunchao Zhang   const PetscInt         *srow,*rstarts,*sstarts;
5640277f51e8SBarry Smith   PetscInt               *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5641f4259b30SLisandro Dalcin   PetscInt               i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len;
5642277f51e8SBarry Smith   PetscScalar            *b_otha,*bufa,*bufA,*vals = NULL;
5643ddea5d60SJunchao Zhang   MPI_Request            *reqs = NULL,*rwaits = NULL,*swaits = NULL;
5644ddea5d60SJunchao Zhang   PetscMPIInt            size,tag,rank,nreqs;
5645429d309bSHong Zhang 
5646429d309bSHong Zhang   PetscFunctionBegin;
56479566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
56489566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm,&size));
5649a7c7454dSHong Zhang 
5650c0aa6a63SJacob Faibussowitsch   if (PetscUnlikely(A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)) {
565198921bdaSJacob Faibussowitsch     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5652429d309bSHong Zhang   }
56539566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0));
56549566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm,&rank));
5655a6b2eed2SHong Zhang 
5656ec07b8f8SHong Zhang   if (size == 1) {
5657ec07b8f8SHong Zhang     startsj_s = NULL;
5658ec07b8f8SHong Zhang     bufa_ptr  = NULL;
565952f7967eSHong Zhang     *B_oth    = NULL;
5660ec07b8f8SHong Zhang     PetscFunctionReturn(0);
5661ec07b8f8SHong Zhang   }
5662ec07b8f8SHong Zhang 
5663fa83eaafSHong Zhang   ctx = a->Mvctx;
56644b8d542aSHong Zhang   tag = ((PetscObject)ctx)->tag;
56654b8d542aSHong Zhang 
56669566063dSJacob Faibussowitsch   PetscCall(VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs));
56673515ee7fSJunchao Zhang   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
56689566063dSJacob Faibussowitsch   PetscCall(VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs));
56699566063dSJacob Faibussowitsch   PetscCall(PetscMPIIntCast(nsends+nrecvs,&nreqs));
56709566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nreqs,&reqs));
5671ddea5d60SJunchao Zhang   rwaits = reqs;
5672ddea5d60SJunchao Zhang   swaits = reqs + nrecvs;
5673429d309bSHong Zhang 
5674b7f45c76SHong Zhang   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5675429d309bSHong Zhang   if (scall == MAT_INITIAL_MATRIX) {
5676a6b2eed2SHong Zhang     /* i-array */
5677a6b2eed2SHong Zhang     /*---------*/
5678a6b2eed2SHong Zhang     /*  post receives */
56799566063dSJacob Faibussowitsch     if (nrecvs) PetscCall(PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues)); /* rstarts can be NULL when nrecvs=0 */
5680a6b2eed2SHong Zhang     for (i=0; i<nrecvs; i++) {
568174268593SBarry Smith       rowlen = rvalues + rstarts[i]*rbs;
5682e42f35eeSHong Zhang       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
56839566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i));
5684429d309bSHong Zhang     }
5685a6b2eed2SHong Zhang 
5686a6b2eed2SHong Zhang     /* pack the outgoing message */
56879566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj));
56882205254eSKarl Rupp 
56892205254eSKarl Rupp     sstartsj[0] = 0;
56902205254eSKarl Rupp     rstartsj[0] = 0;
5691a6b2eed2SHong Zhang     len         = 0; /* total length of j or a array to be sent */
56923515ee7fSJunchao Zhang     if (nsends) {
56933515ee7fSJunchao Zhang       k    = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
56949566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues));
56953515ee7fSJunchao Zhang     }
5696a6b2eed2SHong Zhang     for (i=0; i<nsends; i++) {
56973515ee7fSJunchao Zhang       rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5698e42f35eeSHong Zhang       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
569987025532SHong Zhang       for (j=0; j<nrows; j++) {
5700d0f46423SBarry Smith         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5701e42f35eeSHong Zhang         for (l=0; l<sbs; l++) {
57029566063dSJacob Faibussowitsch           PetscCall(MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL)); /* rowlength */
57032205254eSKarl Rupp 
5704e42f35eeSHong Zhang           rowlen[j*sbs+l] = ncols;
57052205254eSKarl Rupp 
5706e42f35eeSHong Zhang           len += ncols;
57079566063dSJacob Faibussowitsch           PetscCall(MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL));
5708e42f35eeSHong Zhang         }
5709a6b2eed2SHong Zhang         k++;
5710429d309bSHong Zhang       }
57119566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i));
57122205254eSKarl Rupp 
5713dea91ad1SHong Zhang       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5714429d309bSHong Zhang     }
571587025532SHong Zhang     /* recvs and sends of i-array are completed */
57169566063dSJacob Faibussowitsch     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE));
57179566063dSJacob Faibussowitsch     PetscCall(PetscFree(svalues));
5718e42f35eeSHong Zhang 
5719a6b2eed2SHong Zhang     /* allocate buffers for sending j and a arrays */
57209566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(len+1,&bufj));
57219566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(len+1,&bufa));
5722a6b2eed2SHong Zhang 
572387025532SHong Zhang     /* create i-array of B_oth */
57249566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(aBn+2,&b_othi));
57252205254eSKarl Rupp 
572687025532SHong Zhang     b_othi[0] = 0;
5727a6b2eed2SHong Zhang     len       = 0; /* total length of j or a array to be received */
5728a6b2eed2SHong Zhang     k         = 0;
5729a6b2eed2SHong Zhang     for (i=0; i<nrecvs; i++) {
57303515ee7fSJunchao Zhang       rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
57313515ee7fSJunchao Zhang       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
573287025532SHong Zhang       for (j=0; j<nrows; j++) {
573387025532SHong Zhang         b_othi[k+1] = b_othi[k] + rowlen[j];
57349566063dSJacob Faibussowitsch         PetscCall(PetscIntSumError(rowlen[j],len,&len));
5735f91af8c7SBarry Smith         k++;
5736a6b2eed2SHong Zhang       }
5737dea91ad1SHong Zhang       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5738a6b2eed2SHong Zhang     }
57399566063dSJacob Faibussowitsch     PetscCall(PetscFree(rvalues));
5740a6b2eed2SHong Zhang 
574187025532SHong Zhang     /* allocate space for j and a arrrays of B_oth */
57429566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(b_othi[aBn]+1,&b_othj));
57439566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(b_othi[aBn]+1,&b_otha));
5744a6b2eed2SHong Zhang 
574587025532SHong Zhang     /* j-array */
574687025532SHong Zhang     /*---------*/
5747a6b2eed2SHong Zhang     /*  post receives of j-array */
5748a6b2eed2SHong Zhang     for (i=0; i<nrecvs; i++) {
574987025532SHong Zhang       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
57509566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i));
5751a6b2eed2SHong Zhang     }
5752e42f35eeSHong Zhang 
5753e42f35eeSHong Zhang     /* pack the outgoing message j-array */
57543515ee7fSJunchao Zhang     if (nsends) k = sstarts[0];
5755a6b2eed2SHong Zhang     for (i=0; i<nsends; i++) {
5756e42f35eeSHong Zhang       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5757a6b2eed2SHong Zhang       bufJ  = bufj+sstartsj[i];
575887025532SHong Zhang       for (j=0; j<nrows; j++) {
5759d0f46423SBarry Smith         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5760e42f35eeSHong Zhang         for (ll=0; ll<sbs; ll++) {
57619566063dSJacob Faibussowitsch           PetscCall(MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL));
5762a6b2eed2SHong Zhang           for (l=0; l<ncols; l++) {
5763a6b2eed2SHong Zhang             *bufJ++ = cols[l];
576487025532SHong Zhang           }
57659566063dSJacob Faibussowitsch           PetscCall(MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL));
5766e42f35eeSHong Zhang         }
576787025532SHong Zhang       }
57689566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i));
576987025532SHong Zhang     }
577087025532SHong Zhang 
577187025532SHong Zhang     /* recvs and sends of j-array are completed */
57729566063dSJacob Faibussowitsch     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE));
577387025532SHong Zhang   } else if (scall == MAT_REUSE_MATRIX) {
5774b7f45c76SHong Zhang     sstartsj = *startsj_s;
57751d79065fSBarry Smith     rstartsj = *startsj_r;
577687025532SHong Zhang     bufa     = *bufa_ptr;
577787025532SHong Zhang     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
57789566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJGetArrayWrite(*B_oth,&b_otha));
5779ddea5d60SJunchao Zhang   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
578087025532SHong Zhang 
578187025532SHong Zhang   /* a-array */
578287025532SHong Zhang   /*---------*/
578387025532SHong Zhang   /*  post receives of a-array */
578487025532SHong Zhang   for (i=0; i<nrecvs; i++) {
578587025532SHong Zhang     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
57869566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i));
578787025532SHong Zhang   }
5788e42f35eeSHong Zhang 
5789e42f35eeSHong Zhang   /* pack the outgoing message a-array */
57903515ee7fSJunchao Zhang   if (nsends) k = sstarts[0];
579187025532SHong Zhang   for (i=0; i<nsends; i++) {
5792e42f35eeSHong Zhang     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
579387025532SHong Zhang     bufA  = bufa+sstartsj[i];
579487025532SHong Zhang     for (j=0; j<nrows; j++) {
5795d0f46423SBarry Smith       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5796e42f35eeSHong Zhang       for (ll=0; ll<sbs; ll++) {
57979566063dSJacob Faibussowitsch         PetscCall(MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals));
579887025532SHong Zhang         for (l=0; l<ncols; l++) {
5799a6b2eed2SHong Zhang           *bufA++ = vals[l];
5800a6b2eed2SHong Zhang         }
58019566063dSJacob Faibussowitsch         PetscCall(MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals));
5802e42f35eeSHong Zhang       }
5803a6b2eed2SHong Zhang     }
58049566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i));
5805a6b2eed2SHong Zhang   }
580687025532SHong Zhang   /* recvs and sends of a-array are completed */
58079566063dSJacob Faibussowitsch   if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE));
58089566063dSJacob Faibussowitsch   PetscCall(PetscFree(reqs));
5809a6b2eed2SHong Zhang 
581087025532SHong Zhang   if (scall == MAT_INITIAL_MATRIX) {
5811a6b2eed2SHong Zhang     /* put together the new matrix */
58129566063dSJacob Faibussowitsch     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth));
5813a6b2eed2SHong Zhang 
5814a6b2eed2SHong Zhang     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5815a6b2eed2SHong Zhang     /* Since these are PETSc arrays, change flags to free them as necessary. */
581687025532SHong Zhang     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5817e6b907acSBarry Smith     b_oth->free_a  = PETSC_TRUE;
5818e6b907acSBarry Smith     b_oth->free_ij = PETSC_TRUE;
581987025532SHong Zhang     b_oth->nonew   = 0;
5820a6b2eed2SHong Zhang 
58219566063dSJacob Faibussowitsch     PetscCall(PetscFree(bufj));
5822b7f45c76SHong Zhang     if (!startsj_s || !bufa_ptr) {
58239566063dSJacob Faibussowitsch       PetscCall(PetscFree2(sstartsj,rstartsj));
58249566063dSJacob Faibussowitsch       PetscCall(PetscFree(bufa_ptr));
5825dea91ad1SHong Zhang     } else {
5826b7f45c76SHong Zhang       *startsj_s = sstartsj;
58271d79065fSBarry Smith       *startsj_r = rstartsj;
582887025532SHong Zhang       *bufa_ptr  = bufa;
582987025532SHong Zhang     }
5830fff043a9SJunchao Zhang   } else if (scall == MAT_REUSE_MATRIX) {
58319566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth,&b_otha));
5832dea91ad1SHong Zhang   }
58333515ee7fSJunchao Zhang 
58349566063dSJacob Faibussowitsch   PetscCall(VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs));
58359566063dSJacob Faibussowitsch   PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs));
58369566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0));
5837429d309bSHong Zhang   PetscFunctionReturn(0);
5838429d309bSHong Zhang }
5839ccd8e176SBarry Smith 
5840cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5841cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5842ca9cdca7SRichard Tran Mills PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
58439779e05dSSatish Balay #if defined(PETSC_HAVE_MKL_SPARSE)
5844a84739b8SRichard Tran Mills PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5845191b95cbSRichard Tran Mills #endif
5846ae8d29abSPierre Jolivet PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*);
5847cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
58485d7652ecSHong Zhang #if defined(PETSC_HAVE_ELEMENTAL)
5849cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
58505d7652ecSHong Zhang #endif
5851d24d4204SJose E. Roman #if defined(PETSC_HAVE_SCALAPACK)
5852d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*);
5853d24d4204SJose E. Roman #endif
585463c07aadSStefano Zampini #if defined(PETSC_HAVE_HYPRE)
585563c07aadSStefano Zampini PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
585663c07aadSStefano Zampini #endif
58573338378cSStefano Zampini #if defined(PETSC_HAVE_CUDA)
58583338378cSStefano Zampini PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*);
58593338378cSStefano Zampini #endif
58603d0639e7SStefano Zampini #if defined(PETSC_HAVE_KOKKOS_KERNELS)
58613d0639e7SStefano Zampini PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*);
58623d0639e7SStefano Zampini #endif
5863d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
58644222ddf1SHong Zhang PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
58654222ddf1SHong Zhang PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
586617667f90SBarry Smith 
5867fc4dec0aSBarry Smith /*
5868fc4dec0aSBarry Smith     Computes (B'*A')' since computing B*A directly is untenable
5869fc4dec0aSBarry Smith 
5870fc4dec0aSBarry Smith                n                       p                          p
58712da392ccSBarry Smith         [             ]       [             ]         [                 ]
58722da392ccSBarry Smith       m [      A      ]  *  n [       B     ]   =   m [         C       ]
58732da392ccSBarry Smith         [             ]       [             ]         [                 ]
5874fc4dec0aSBarry Smith 
5875fc4dec0aSBarry Smith */
58766718818eSStefano Zampini static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5877fc4dec0aSBarry Smith {
5878fc4dec0aSBarry Smith   Mat            At,Bt,Ct;
5879fc4dec0aSBarry Smith 
5880fc4dec0aSBarry Smith   PetscFunctionBegin;
58819566063dSJacob Faibussowitsch   PetscCall(MatTranspose(A,MAT_INITIAL_MATRIX,&At));
58829566063dSJacob Faibussowitsch   PetscCall(MatTranspose(B,MAT_INITIAL_MATRIX,&Bt));
58839566063dSJacob Faibussowitsch   PetscCall(MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct));
58849566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&At));
58859566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&Bt));
58869566063dSJacob Faibussowitsch   PetscCall(MatTranspose(Ct,MAT_REUSE_MATRIX,&C));
58879566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&Ct));
5888fc4dec0aSBarry Smith   PetscFunctionReturn(0);
5889fc4dec0aSBarry Smith }
5890fc4dec0aSBarry Smith 
58916718818eSStefano Zampini static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C)
5892fc4dec0aSBarry Smith {
58936718818eSStefano Zampini   PetscBool      cisdense;
5894fc4dec0aSBarry Smith 
5895fc4dec0aSBarry Smith   PetscFunctionBegin;
589608401ef6SPierre Jolivet   PetscCheck(A->cmap->n == B->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT,A->cmap->n,B->rmap->n);
58979566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N));
58989566063dSJacob Faibussowitsch   PetscCall(MatSetBlockSizesFromMats(C,A,B));
58999566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,""));
59006718818eSStefano Zampini   if (!cisdense) {
59019566063dSJacob Faibussowitsch     PetscCall(MatSetType(C,((PetscObject)A)->type_name));
59026718818eSStefano Zampini   }
59039566063dSJacob Faibussowitsch   PetscCall(MatSetUp(C));
5904f75ecaa4SHong Zhang 
59054222ddf1SHong Zhang   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5906fc4dec0aSBarry Smith   PetscFunctionReturn(0);
5907fc4dec0aSBarry Smith }
5908fc4dec0aSBarry Smith 
5909fc4dec0aSBarry Smith /* ----------------------------------------------------------------*/
59104222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
5911fc4dec0aSBarry Smith {
59124222ddf1SHong Zhang   Mat_Product *product = C->product;
59134222ddf1SHong Zhang   Mat         A = product->A,B=product->B;
5914fc4dec0aSBarry Smith 
5915fc4dec0aSBarry Smith   PetscFunctionBegin;
59164222ddf1SHong Zhang   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)
591798921bdaSJacob Faibussowitsch     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
59184222ddf1SHong Zhang 
59194222ddf1SHong Zhang   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
59204222ddf1SHong Zhang   C->ops->productsymbolic = MatProductSymbolic_AB;
5921fc4dec0aSBarry Smith   PetscFunctionReturn(0);
5922fc4dec0aSBarry Smith }
5923fc4dec0aSBarry Smith 
59244222ddf1SHong Zhang PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
59254222ddf1SHong Zhang {
59264222ddf1SHong Zhang   Mat_Product    *product = C->product;
59274222ddf1SHong Zhang 
59284222ddf1SHong Zhang   PetscFunctionBegin;
59294222ddf1SHong Zhang   if (product->type == MATPRODUCT_AB) {
59309566063dSJacob Faibussowitsch     PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C));
59316718818eSStefano Zampini   }
59324222ddf1SHong Zhang   PetscFunctionReturn(0);
59334222ddf1SHong Zhang }
5934394ed5ebSJunchao Zhang 
5935394ed5ebSJunchao Zhang /* std::upper_bound(): Given a sorted array, return index of the first element in range [first,last) whose value
5936394ed5ebSJunchao Zhang    is greater than value, or last if there is no such element.
5937394ed5ebSJunchao Zhang */
5938394ed5ebSJunchao Zhang static inline PetscErrorCode PetscSortedIntUpperBound(PetscInt *array,PetscCount first,PetscCount last,PetscInt value,PetscCount *upper)
5939394ed5ebSJunchao Zhang {
5940394ed5ebSJunchao Zhang   PetscCount  it,step,count = last - first;
5941394ed5ebSJunchao Zhang 
5942394ed5ebSJunchao Zhang   PetscFunctionBegin;
5943394ed5ebSJunchao Zhang   while (count > 0) {
5944394ed5ebSJunchao Zhang     it   = first;
5945394ed5ebSJunchao Zhang     step = count / 2;
5946394ed5ebSJunchao Zhang     it  += step;
5947394ed5ebSJunchao Zhang     if (!(value < array[it])) {
5948394ed5ebSJunchao Zhang       first  = ++it;
5949394ed5ebSJunchao Zhang       count -= step + 1;
5950394ed5ebSJunchao Zhang     } else count = step;
5951394ed5ebSJunchao Zhang   }
5952394ed5ebSJunchao Zhang   *upper = first;
5953394ed5ebSJunchao Zhang   PetscFunctionReturn(0);
5954394ed5ebSJunchao Zhang }
5955394ed5ebSJunchao Zhang 
5956158ec288SJunchao Zhang /* Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix
5957394ed5ebSJunchao Zhang 
5958394ed5ebSJunchao Zhang   Input Parameters:
5959394ed5ebSJunchao Zhang 
5960394ed5ebSJunchao Zhang     j1,rowBegin1,rowEnd1,perm1,jmap1: describe the first set of nonzeros (Set1)
5961394ed5ebSJunchao Zhang     j2,rowBegin2,rowEnd2,perm2,jmap2: describe the second set of nonzeros (Set2)
5962394ed5ebSJunchao Zhang 
5963158ec288SJunchao Zhang     mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat
5964394ed5ebSJunchao Zhang 
5965394ed5ebSJunchao Zhang     For Set1, j1[] contains column indices of the nonzeros.
5966394ed5ebSJunchao Zhang     For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k
5967394ed5ebSJunchao Zhang     respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted,
5968394ed5ebSJunchao Zhang     but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1.
5969394ed5ebSJunchao Zhang 
5970394ed5ebSJunchao Zhang     Similar for Set2.
5971394ed5ebSJunchao Zhang 
5972394ed5ebSJunchao Zhang     This routine merges the two sets of nonzeros row by row and removes repeats.
5973394ed5ebSJunchao Zhang 
5974158ec288SJunchao Zhang   Output Parameters: (memory is allocated by the caller)
5975394ed5ebSJunchao Zhang 
5976394ed5ebSJunchao Zhang     i[],j[]: the CSR of the merged matrix, which has m rows.
5977394ed5ebSJunchao Zhang     imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix.
5978394ed5ebSJunchao Zhang     imap2[]: similar to imap1[], but for Set2.
5979394ed5ebSJunchao Zhang     Note we order nonzeros row-by-row and from left to right.
5980394ed5ebSJunchao Zhang */
5981394ed5ebSJunchao Zhang static PetscErrorCode MatMergeEntries_Internal(Mat mat,const PetscInt j1[],const PetscInt j2[],const PetscCount rowBegin1[],const PetscCount rowEnd1[],
5982394ed5ebSJunchao Zhang   const PetscCount rowBegin2[],const PetscCount rowEnd2[],const PetscCount jmap1[],const PetscCount jmap2[],
5983394ed5ebSJunchao Zhang   PetscCount imap1[],PetscCount imap2[],PetscInt i[],PetscInt j[])
5984394ed5ebSJunchao Zhang {
5985394ed5ebSJunchao Zhang   PetscInt       r,m; /* Row index of mat */
5986394ed5ebSJunchao Zhang   PetscCount     t,t1,t2,b1,e1,b2,e2;
5987394ed5ebSJunchao Zhang 
5988394ed5ebSJunchao Zhang   PetscFunctionBegin;
59899566063dSJacob Faibussowitsch   PetscCall(MatGetLocalSize(mat,&m,NULL));
5990394ed5ebSJunchao Zhang   t1   = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */
5991394ed5ebSJunchao Zhang   i[0] = 0;
5992394ed5ebSJunchao Zhang   for (r=0; r<m; r++) { /* Do row by row merging */
5993394ed5ebSJunchao Zhang     b1   = rowBegin1[r];
5994394ed5ebSJunchao Zhang     e1   = rowEnd1[r];
5995394ed5ebSJunchao Zhang     b2   = rowBegin2[r];
5996394ed5ebSJunchao Zhang     e2   = rowEnd2[r];
5997394ed5ebSJunchao Zhang     while (b1 < e1 && b2 < e2) {
5998394ed5ebSJunchao Zhang       if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */
5999394ed5ebSJunchao Zhang         j[t]      = j1[b1];
6000394ed5ebSJunchao Zhang         imap1[t1] = t;
6001394ed5ebSJunchao Zhang         imap2[t2] = t;
6002394ed5ebSJunchao Zhang         b1       += jmap1[t1+1] - jmap1[t1]; /* Jump to next unique local nonzero */
6003394ed5ebSJunchao Zhang         b2       += jmap2[t2+1] - jmap2[t2]; /* Jump to next unique remote nonzero */
6004394ed5ebSJunchao Zhang         t1++; t2++; t++;
6005394ed5ebSJunchao Zhang       } else if (j1[b1] < j2[b2]) {
6006394ed5ebSJunchao Zhang         j[t]      = j1[b1];
6007394ed5ebSJunchao Zhang         imap1[t1] = t;
6008394ed5ebSJunchao Zhang         b1       += jmap1[t1+1] - jmap1[t1];
6009394ed5ebSJunchao Zhang         t1++; t++;
6010394ed5ebSJunchao Zhang       } else {
6011394ed5ebSJunchao Zhang         j[t]      = j2[b2];
6012394ed5ebSJunchao Zhang         imap2[t2] = t;
6013394ed5ebSJunchao Zhang         b2       += jmap2[t2+1] - jmap2[t2];
6014394ed5ebSJunchao Zhang         t2++; t++;
6015394ed5ebSJunchao Zhang       }
6016394ed5ebSJunchao Zhang     }
6017394ed5ebSJunchao Zhang     /* Merge the remaining in either j1[] or j2[] */
6018394ed5ebSJunchao Zhang     while (b1 < e1) {
6019394ed5ebSJunchao Zhang       j[t]      = j1[b1];
6020394ed5ebSJunchao Zhang       imap1[t1] = t;
6021394ed5ebSJunchao Zhang       b1       += jmap1[t1+1] - jmap1[t1];
6022394ed5ebSJunchao Zhang       t1++; t++;
6023394ed5ebSJunchao Zhang     }
6024394ed5ebSJunchao Zhang     while (b2 < e2) {
6025394ed5ebSJunchao Zhang       j[t]      = j2[b2];
6026394ed5ebSJunchao Zhang       imap2[t2] = t;
6027394ed5ebSJunchao Zhang       b2       += jmap2[t2+1] - jmap2[t2];
6028394ed5ebSJunchao Zhang       t2++; t++;
6029394ed5ebSJunchao Zhang     }
6030394ed5ebSJunchao Zhang     i[r+1] = t;
6031394ed5ebSJunchao Zhang   }
6032394ed5ebSJunchao Zhang   PetscFunctionReturn(0);
6033394ed5ebSJunchao Zhang }
6034394ed5ebSJunchao Zhang 
6035158ec288SJunchao Zhang /* Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block
6036394ed5ebSJunchao Zhang 
6037394ed5ebSJunchao Zhang   Input Parameters:
6038394ed5ebSJunchao Zhang     mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m.
6039394ed5ebSJunchao Zhang     n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[]
6040394ed5ebSJunchao Zhang       respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n.
6041394ed5ebSJunchao Zhang 
6042394ed5ebSJunchao Zhang       i[] is already sorted, but within a row, j[] is not sorted and might have repeats.
6043394ed5ebSJunchao Zhang       i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting.
6044394ed5ebSJunchao Zhang 
6045394ed5ebSJunchao Zhang   Output Parameters:
6046394ed5ebSJunchao Zhang     j[],perm[]: the routine needs to sort j[] within each row along with perm[].
6047394ed5ebSJunchao Zhang     rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller.
6048394ed5ebSJunchao Zhang       They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block,
6049394ed5ebSJunchao Zhang       and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block.
6050394ed5ebSJunchao Zhang 
6051394ed5ebSJunchao Zhang     Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine.
6052158ec288SJunchao Zhang       Atot: number of entries belonging to the diagonal block.
6053158ec288SJunchao Zhang       Annz: number of unique nonzeros belonging to the diagonal block.
6054394ed5ebSJunchao Zhang       Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count
6055394ed5ebSJunchao Zhang         repeats (i.e., same 'i,j' pair).
6056394ed5ebSJunchao Zhang       Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t]
6057394ed5ebSJunchao Zhang         is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0.
6058394ed5ebSJunchao Zhang 
6059394ed5ebSJunchao Zhang       Atot: number of entries belonging to the diagonal block
6060394ed5ebSJunchao Zhang       Annz: number of unique nonzeros belonging to the diagonal block.
6061394ed5ebSJunchao Zhang 
6062394ed5ebSJunchao Zhang     Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block.
6063394ed5ebSJunchao Zhang 
6064158ec288SJunchao Zhang     Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1().
6065394ed5ebSJunchao Zhang */
6066394ed5ebSJunchao Zhang static PetscErrorCode MatSplitEntries_Internal(Mat mat,PetscCount n,const PetscInt i[],PetscInt j[],
6067394ed5ebSJunchao Zhang   PetscCount perm[],PetscCount rowBegin[],PetscCount rowMid[],PetscCount rowEnd[],
6068394ed5ebSJunchao Zhang   PetscCount *Atot_,PetscCount **Aperm_,PetscCount *Annz_,PetscCount **Ajmap_,
6069394ed5ebSJunchao Zhang   PetscCount *Btot_,PetscCount **Bperm_,PetscCount *Bnnz_,PetscCount **Bjmap_)
6070394ed5ebSJunchao Zhang {
6071394ed5ebSJunchao Zhang   PetscInt          cstart,cend,rstart,rend,row,col;
6072394ed5ebSJunchao Zhang   PetscCount        Atot=0,Btot=0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */
6073394ed5ebSJunchao Zhang   PetscCount        Annz=0,Bnnz=0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */
6074394ed5ebSJunchao Zhang   PetscCount        k,m,p,q,r,s,mid;
6075394ed5ebSJunchao Zhang   PetscCount        *Aperm,*Bperm,*Ajmap,*Bjmap;
6076394ed5ebSJunchao Zhang 
6077394ed5ebSJunchao Zhang   PetscFunctionBegin;
60789566063dSJacob Faibussowitsch   PetscCall(PetscLayoutGetRange(mat->rmap,&rstart,&rend));
60799566063dSJacob Faibussowitsch   PetscCall(PetscLayoutGetRange(mat->cmap,&cstart,&cend));
6080394ed5ebSJunchao Zhang   m    = rend - rstart;
6081394ed5ebSJunchao Zhang 
6082394ed5ebSJunchao Zhang   for (k=0; k<n; k++) {if (i[k]>=0) break;} /* Skip negative rows */
6083394ed5ebSJunchao Zhang 
6084394ed5ebSJunchao Zhang   /* Process [k,n): sort and partition each local row into diag and offdiag portions,
6085394ed5ebSJunchao Zhang      fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz.
6086394ed5ebSJunchao Zhang   */
6087394ed5ebSJunchao Zhang   while (k<n) {
6088394ed5ebSJunchao Zhang     row = i[k];
6089394ed5ebSJunchao Zhang     /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */
6090394ed5ebSJunchao Zhang     for (s=k; s<n; s++) if (i[s] != row) break;
6091394ed5ebSJunchao Zhang     for (p=k; p<s; p++) {
6092394ed5ebSJunchao Zhang       if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; /* Shift diag columns to range of [-PETSC_MAX_INT, -1]  */
609354c59aa7SJacob Faibussowitsch       else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index %" PetscInt_FMT " is out of range",j[p]);
6094394ed5ebSJunchao Zhang     }
60959566063dSJacob Faibussowitsch     PetscCall(PetscSortIntWithCountArray(s-k,j+k,perm+k));
6096158ec288SJunchao Zhang     PetscCall(PetscSortedIntUpperBound(j,k,s,-1,&mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */
6097394ed5ebSJunchao Zhang     rowBegin[row-rstart] = k;
6098394ed5ebSJunchao Zhang     rowMid[row-rstart]   = mid;
6099394ed5ebSJunchao Zhang     rowEnd[row-rstart]   = s;
6100394ed5ebSJunchao Zhang 
6101394ed5ebSJunchao Zhang     /* Count nonzeros of this diag/offdiag row, which might have repeats */
6102394ed5ebSJunchao Zhang     Atot += mid - k;
6103394ed5ebSJunchao Zhang     Btot += s - mid;
6104394ed5ebSJunchao Zhang 
6105394ed5ebSJunchao Zhang     /* Count unique nonzeros of this diag/offdiag row */
6106394ed5ebSJunchao Zhang     for (p=k; p<mid;) {
6107394ed5ebSJunchao Zhang       col = j[p];
6108394ed5ebSJunchao Zhang       do {j[p] += PETSC_MAX_INT; p++;} while (p<mid && j[p] == col); /* Revert the modified diagonal indices */
6109394ed5ebSJunchao Zhang       Annz++;
6110394ed5ebSJunchao Zhang     }
6111394ed5ebSJunchao Zhang 
6112394ed5ebSJunchao Zhang     for (p=mid; p<s;) {
6113394ed5ebSJunchao Zhang       col = j[p];
6114394ed5ebSJunchao Zhang       do {p++;} while (p<s && j[p] == col);
6115394ed5ebSJunchao Zhang       Bnnz++;
6116394ed5ebSJunchao Zhang     }
6117394ed5ebSJunchao Zhang     k = s;
6118394ed5ebSJunchao Zhang   }
6119394ed5ebSJunchao Zhang 
6120394ed5ebSJunchao Zhang   /* Allocation according to Atot, Btot, Annz, Bnnz */
6121158ec288SJunchao Zhang   PetscCall(PetscMalloc1(Atot,&Aperm));
6122158ec288SJunchao Zhang   PetscCall(PetscMalloc1(Btot,&Bperm));
6123158ec288SJunchao Zhang   PetscCall(PetscMalloc1(Annz+1,&Ajmap));
6124158ec288SJunchao Zhang   PetscCall(PetscMalloc1(Bnnz+1,&Bjmap));
6125394ed5ebSJunchao Zhang 
6126394ed5ebSJunchao Zhang   /* Re-scan indices and copy diag/offdiag permuation indices to Aperm, Bperm and also fill Ajmap and Bjmap */
6127394ed5ebSJunchao Zhang   Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0;
6128394ed5ebSJunchao Zhang   for (r=0; r<m; r++) {
6129394ed5ebSJunchao Zhang     k     = rowBegin[r];
6130394ed5ebSJunchao Zhang     mid   = rowMid[r];
6131394ed5ebSJunchao Zhang     s     = rowEnd[r];
61329566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(Aperm+Atot,perm+k,  mid-k));
61339566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(Bperm+Btot,perm+mid,s-mid));
6134394ed5ebSJunchao Zhang     Atot += mid - k;
6135394ed5ebSJunchao Zhang     Btot += s - mid;
6136394ed5ebSJunchao Zhang 
6137394ed5ebSJunchao Zhang     /* Scan column indices in this row and find out how many repeats each unique nonzero has */
6138394ed5ebSJunchao Zhang     for (p=k; p<mid;) {
6139394ed5ebSJunchao Zhang       col = j[p];
6140394ed5ebSJunchao Zhang       q   = p;
6141394ed5ebSJunchao Zhang       do {p++;} while (p<mid && j[p] == col);
6142394ed5ebSJunchao Zhang       Ajmap[Annz+1] = Ajmap[Annz] + (p - q);
6143394ed5ebSJunchao Zhang       Annz++;
6144394ed5ebSJunchao Zhang     }
6145394ed5ebSJunchao Zhang 
6146394ed5ebSJunchao Zhang     for (p=mid; p<s;) {
6147394ed5ebSJunchao Zhang       col = j[p];
6148394ed5ebSJunchao Zhang       q   = p;
6149394ed5ebSJunchao Zhang       do {p++;} while (p<s && j[p] == col);
6150394ed5ebSJunchao Zhang       Bjmap[Bnnz+1] = Bjmap[Bnnz] + (p - q);
6151394ed5ebSJunchao Zhang       Bnnz++;
6152394ed5ebSJunchao Zhang     }
6153394ed5ebSJunchao Zhang   }
6154394ed5ebSJunchao Zhang   /* Output */
6155394ed5ebSJunchao Zhang   *Aperm_ = Aperm;
6156394ed5ebSJunchao Zhang   *Annz_  = Annz;
6157394ed5ebSJunchao Zhang   *Atot_  = Atot;
6158394ed5ebSJunchao Zhang   *Ajmap_ = Ajmap;
6159394ed5ebSJunchao Zhang   *Bperm_ = Bperm;
6160394ed5ebSJunchao Zhang   *Bnnz_  = Bnnz;
6161394ed5ebSJunchao Zhang   *Btot_  = Btot;
6162394ed5ebSJunchao Zhang   *Bjmap_ = Bjmap;
6163394ed5ebSJunchao Zhang   PetscFunctionReturn(0);
6164394ed5ebSJunchao Zhang }
6165394ed5ebSJunchao Zhang 
6166158ec288SJunchao Zhang /* Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix
6167158ec288SJunchao Zhang 
6168158ec288SJunchao Zhang   Input Parameters:
6169158ec288SJunchao Zhang     nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[]
6170158ec288SJunchao Zhang     nnz:  number of unique nonzeros in the merged matrix
6171158ec288SJunchao Zhang     imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix
6172158ec288SJunchao Zhang     jmap[nnz1+1]: i-th nonzeron in the set has jmap[i+1] - jmap[i] repeats in the set
6173158ec288SJunchao Zhang 
6174158ec288SJunchao Zhang   Output Parameter: (memory is allocated by the caller)
6175158ec288SJunchao Zhang     jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set
6176158ec288SJunchao Zhang 
6177158ec288SJunchao Zhang   Example:
6178158ec288SJunchao Zhang     nnz1 = 4
6179158ec288SJunchao Zhang     nnz  = 6
6180158ec288SJunchao Zhang     imap = [1,3,4,5]
6181158ec288SJunchao Zhang     jmap = [0,3,5,6,7]
6182158ec288SJunchao Zhang    then,
6183158ec288SJunchao Zhang     jmap_new = [0,0,3,3,5,6,7]
6184158ec288SJunchao Zhang */
6185158ec288SJunchao Zhang static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1,PetscCount nnz,const PetscCount imap[],const PetscCount jmap[],PetscCount jmap_new[])
6186158ec288SJunchao Zhang {
6187158ec288SJunchao Zhang   PetscCount k,p;
6188158ec288SJunchao Zhang 
6189158ec288SJunchao Zhang   PetscFunctionBegin;
6190158ec288SJunchao Zhang   jmap_new[0] = 0;
6191158ec288SJunchao Zhang   p = nnz; /* p loops over jmap_new[] backwards */
6192158ec288SJunchao Zhang   for (k=nnz1-1; k>=0; k--) { /* k loops over imap[] */
6193158ec288SJunchao Zhang     for (; p > imap[k]; p--) jmap_new[p] = jmap[k+1];
6194158ec288SJunchao Zhang   }
6195158ec288SJunchao Zhang   for (; p >= 0; p--) jmap_new[p] = jmap[0];
6196158ec288SJunchao Zhang   PetscFunctionReturn(0);
6197158ec288SJunchao Zhang }
6198158ec288SJunchao Zhang 
6199394ed5ebSJunchao Zhang PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, const PetscInt coo_i[], const PetscInt coo_j[])
6200394ed5ebSJunchao Zhang {
6201394ed5ebSJunchao Zhang   MPI_Comm                  comm;
6202394ed5ebSJunchao Zhang   PetscMPIInt               rank,size;
6203394ed5ebSJunchao Zhang   PetscInt                  m,n,M,N,rstart,rend,cstart,cend; /* Sizes, indices of row/col, therefore with type PetscInt */
6204394ed5ebSJunchao Zhang   PetscCount                k,p,q,rem; /* Loop variables over coo arrays */
6205394ed5ebSJunchao Zhang   Mat_MPIAIJ                *mpiaij = (Mat_MPIAIJ*)mat->data;
6206394ed5ebSJunchao Zhang 
6207394ed5ebSJunchao Zhang   PetscFunctionBegin;
62089566063dSJacob Faibussowitsch   PetscCall(PetscFree(mpiaij->garray));
62099566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&mpiaij->lvec));
6210cbc6b225SStefano Zampini #if defined(PETSC_USE_CTABLE)
62119566063dSJacob Faibussowitsch   PetscCall(PetscTableDestroy(&mpiaij->colmap));
6212cbc6b225SStefano Zampini #else
62139566063dSJacob Faibussowitsch   PetscCall(PetscFree(mpiaij->colmap));
6214cbc6b225SStefano Zampini #endif
62159566063dSJacob Faibussowitsch   PetscCall(VecScatterDestroy(&mpiaij->Mvctx));
6216cbc6b225SStefano Zampini   mat->assembled = PETSC_FALSE;
6217cbc6b225SStefano Zampini   mat->was_assembled = PETSC_FALSE;
62189566063dSJacob Faibussowitsch   PetscCall(MatResetPreallocationCOO_MPIAIJ(mat));
6219cbc6b225SStefano Zampini 
62209566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
62219566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm,&size));
62229566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm,&rank));
62239566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(mat->rmap));
62249566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(mat->cmap));
62259566063dSJacob Faibussowitsch   PetscCall(PetscLayoutGetRange(mat->rmap,&rstart,&rend));
62269566063dSJacob Faibussowitsch   PetscCall(PetscLayoutGetRange(mat->cmap,&cstart,&cend));
62279566063dSJacob Faibussowitsch   PetscCall(MatGetLocalSize(mat,&m,&n));
62289566063dSJacob Faibussowitsch   PetscCall(MatGetSize(mat,&M,&N));
6229394ed5ebSJunchao Zhang 
6230394ed5ebSJunchao Zhang   /* ---------------------------------------------------------------------------*/
6231394ed5ebSJunchao Zhang   /* Sort (i,j) by row along with a permuation array, so that the to-be-ignored */
6232394ed5ebSJunchao Zhang   /* entries come first, then local rows, then remote rows.                     */
6233394ed5ebSJunchao Zhang   /* ---------------------------------------------------------------------------*/
6234394ed5ebSJunchao Zhang   PetscCount n1 = coo_n,*perm1;
6235394ed5ebSJunchao Zhang   PetscInt   *i1,*j1; /* Copies of input COOs along with a permutation array */
62369566063dSJacob Faibussowitsch   PetscCall(PetscMalloc3(n1,&i1,n1,&j1,n1,&perm1));
62379566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(i1,coo_i,n1)); /* Make a copy since we'll modify it */
62389566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(j1,coo_j,n1));
6239394ed5ebSJunchao Zhang   for (k=0; k<n1; k++) perm1[k] = k;
6240394ed5ebSJunchao Zhang 
6241394ed5ebSJunchao Zhang   /* Manipulate indices so that entries with negative row or col indices will have smallest
6242394ed5ebSJunchao Zhang      row indices, local entries will have greater but negative row indices, and remote entries
6243394ed5ebSJunchao Zhang      will have positive row indices.
6244394ed5ebSJunchao Zhang   */
6245394ed5ebSJunchao Zhang   for (k=0; k<n1; k++) {
6246394ed5ebSJunchao Zhang     if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */
6247394ed5ebSJunchao Zhang     else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */
624854c59aa7SJacob Faibussowitsch     else PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_USER_INPUT,"MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows");
6249394ed5ebSJunchao Zhang     else if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */
6250394ed5ebSJunchao Zhang   }
6251394ed5ebSJunchao Zhang 
6252394ed5ebSJunchao Zhang   /* Sort by row; after that, [0,k) have ignored entires, [k,rem) have local rows and [rem,n1) have remote rows */
62539566063dSJacob Faibussowitsch   PetscCall(PetscSortIntWithIntCountArrayPair(n1,i1,j1,perm1));
6254394ed5ebSJunchao Zhang   for (k=0; k<n1; k++) {if (i1[k] > PETSC_MIN_INT) break;} /* Advance k to the first entry we need to take care of */
62559566063dSJacob Faibussowitsch   PetscCall(PetscSortedIntUpperBound(i1,k,n1,rend-1-PETSC_MAX_INT,&rem)); /* rem is upper bound of the last local row */
6256394ed5ebSJunchao Zhang   for (; k<rem; k++) i1[k] += PETSC_MAX_INT; /* Revert row indices of local rows*/
6257394ed5ebSJunchao Zhang 
6258394ed5ebSJunchao Zhang   /* ---------------------------------------------------------------------------*/
6259394ed5ebSJunchao Zhang   /*           Split local rows into diag/offdiag portions                      */
6260394ed5ebSJunchao Zhang   /* ---------------------------------------------------------------------------*/
6261394ed5ebSJunchao Zhang   PetscCount   *rowBegin1,*rowMid1,*rowEnd1;
6262394ed5ebSJunchao Zhang   PetscCount   *Ajmap1,*Aperm1,*Bjmap1,*Bperm1,*Cperm1;
6263394ed5ebSJunchao Zhang   PetscCount   Annz1,Bnnz1,Atot1,Btot1;
6264394ed5ebSJunchao Zhang 
62659566063dSJacob Faibussowitsch   PetscCall(PetscCalloc3(m,&rowBegin1,m,&rowMid1,m,&rowEnd1));
62669566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n1-rem,&Cperm1));
62679566063dSJacob Faibussowitsch   PetscCall(MatSplitEntries_Internal(mat,rem,i1,j1,perm1,rowBegin1,rowMid1,rowEnd1,&Atot1,&Aperm1,&Annz1,&Ajmap1,&Btot1,&Bperm1,&Bnnz1,&Bjmap1));
6268394ed5ebSJunchao Zhang 
6269394ed5ebSJunchao Zhang   /* ---------------------------------------------------------------------------*/
6270394ed5ebSJunchao Zhang   /*           Send remote rows to their owner                                  */
6271394ed5ebSJunchao Zhang   /* ---------------------------------------------------------------------------*/
6272394ed5ebSJunchao Zhang   /* Find which rows should be sent to which remote ranks*/
6273394ed5ebSJunchao Zhang   PetscInt       nsend = 0; /* Number of MPI ranks to send data to */
6274394ed5ebSJunchao Zhang   PetscMPIInt    *sendto; /* [nsend], storing remote ranks */
6275394ed5ebSJunchao Zhang   PetscInt       *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
6276394ed5ebSJunchao Zhang   const PetscInt *ranges;
6277394ed5ebSJunchao Zhang   PetscInt       maxNsend = size >= 128? 128 : size; /* Assume max 128 neighbors; realloc when needed */
6278394ed5ebSJunchao Zhang 
62799566063dSJacob Faibussowitsch   PetscCall(PetscLayoutGetRanges(mat->rmap,&ranges));
62809566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(maxNsend,&sendto,maxNsend,&nentries));
6281394ed5ebSJunchao Zhang   for (k=rem; k<n1;) {
6282394ed5ebSJunchao Zhang     PetscMPIInt  owner;
6283394ed5ebSJunchao Zhang     PetscInt     firstRow,lastRow;
6284cbc6b225SStefano Zampini 
6285394ed5ebSJunchao Zhang     /* Locate a row range */
6286394ed5ebSJunchao Zhang     firstRow = i1[k]; /* first row of this owner */
62879566063dSJacob Faibussowitsch     PetscCall(PetscLayoutFindOwner(mat->rmap,firstRow,&owner));
6288394ed5ebSJunchao Zhang     lastRow  = ranges[owner+1]-1; /* last row of this owner */
6289394ed5ebSJunchao Zhang 
6290394ed5ebSJunchao Zhang     /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */
62919566063dSJacob Faibussowitsch     PetscCall(PetscSortedIntUpperBound(i1,k,n1,lastRow,&p));
6292394ed5ebSJunchao Zhang 
6293394ed5ebSJunchao Zhang     /* All entries in [k,p) belong to this remote owner */
6294394ed5ebSJunchao Zhang     if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
6295394ed5ebSJunchao Zhang       PetscMPIInt *sendto2;
6296394ed5ebSJunchao Zhang       PetscInt    *nentries2;
6297394ed5ebSJunchao Zhang       PetscInt    maxNsend2 = (maxNsend <= size/2) ? maxNsend*2 : size;
6298cbc6b225SStefano Zampini 
62999566063dSJacob Faibussowitsch       PetscCall(PetscMalloc2(maxNsend2,&sendto2,maxNsend2,&nentries2));
63009566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(sendto2,sendto,maxNsend));
63019566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(nentries2,nentries2,maxNsend+1));
63029566063dSJacob Faibussowitsch       PetscCall(PetscFree2(sendto,nentries2));
6303394ed5ebSJunchao Zhang       sendto      = sendto2;
6304394ed5ebSJunchao Zhang       nentries    = nentries2;
6305394ed5ebSJunchao Zhang       maxNsend    = maxNsend2;
6306394ed5ebSJunchao Zhang     }
6307394ed5ebSJunchao Zhang     sendto[nsend]   = owner;
6308394ed5ebSJunchao Zhang     nentries[nsend] = p - k;
63099566063dSJacob Faibussowitsch     PetscCall(PetscCountCast(p-k,&nentries[nsend]));
6310394ed5ebSJunchao Zhang     nsend++;
6311394ed5ebSJunchao Zhang     k = p;
6312394ed5ebSJunchao Zhang   }
6313394ed5ebSJunchao Zhang 
6314394ed5ebSJunchao Zhang   /* Build 1st SF to know offsets on remote to send data */
6315394ed5ebSJunchao Zhang   PetscSF     sf1;
6316394ed5ebSJunchao Zhang   PetscInt    nroots = 1,nroots2 = 0;
6317394ed5ebSJunchao Zhang   PetscInt    nleaves = nsend,nleaves2 = 0;
6318394ed5ebSJunchao Zhang   PetscInt    *offsets;
6319394ed5ebSJunchao Zhang   PetscSFNode *iremote;
6320394ed5ebSJunchao Zhang 
63219566063dSJacob Faibussowitsch   PetscCall(PetscSFCreate(comm,&sf1));
63229566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nsend,&iremote));
63239566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nsend,&offsets));
6324394ed5ebSJunchao Zhang   for (k=0; k<nsend; k++) {
6325394ed5ebSJunchao Zhang     iremote[k].rank  = sendto[k];
6326394ed5ebSJunchao Zhang     iremote[k].index = 0;
6327394ed5ebSJunchao Zhang     nleaves2        += nentries[k];
632854c59aa7SJacob Faibussowitsch     PetscCheck(nleaves2 >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF leaves is too large for PetscInt");
6329394ed5ebSJunchao Zhang   }
63309566063dSJacob Faibussowitsch   PetscCall(PetscSFSetGraph(sf1,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER));
63319566063dSJacob Faibussowitsch   PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1,MPIU_INT,PETSC_MEMTYPE_HOST,&nroots2/*rootdata*/,PETSC_MEMTYPE_HOST,nentries/*leafdata*/,PETSC_MEMTYPE_HOST,offsets/*leafupdate*/,MPI_SUM));
63329566063dSJacob Faibussowitsch   PetscCall(PetscSFFetchAndOpEnd(sf1,MPIU_INT,&nroots2,nentries,offsets,MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */
63339566063dSJacob Faibussowitsch   PetscCall(PetscSFDestroy(&sf1));
633463a3b9bcSJacob Faibussowitsch   PetscAssert(nleaves2 == n1-rem,PETSC_COMM_SELF,PETSC_ERR_PLIB,"nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "",nleaves2,n1-rem);
6335394ed5ebSJunchao Zhang 
6336394ed5ebSJunchao Zhang   /* Build 2nd SF to send remote COOs to their owner */
6337394ed5ebSJunchao Zhang   PetscSF sf2;
6338394ed5ebSJunchao Zhang   nroots  = nroots2;
6339394ed5ebSJunchao Zhang   nleaves = nleaves2;
63409566063dSJacob Faibussowitsch   PetscCall(PetscSFCreate(comm,&sf2));
63419566063dSJacob Faibussowitsch   PetscCall(PetscSFSetFromOptions(sf2));
63429566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nleaves,&iremote));
6343394ed5ebSJunchao Zhang   p       = 0;
6344394ed5ebSJunchao Zhang   for (k=0; k<nsend; k++) {
634554c59aa7SJacob Faibussowitsch     PetscCheck(offsets[k] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF roots is too large for PetscInt");
6346394ed5ebSJunchao Zhang     for (q=0; q<nentries[k]; q++,p++) {
6347394ed5ebSJunchao Zhang       iremote[p].rank  = sendto[k];
6348394ed5ebSJunchao Zhang       iremote[p].index = offsets[k] + q;
6349394ed5ebSJunchao Zhang     }
6350394ed5ebSJunchao Zhang   }
63519566063dSJacob Faibussowitsch   PetscCall(PetscSFSetGraph(sf2,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER));
6352394ed5ebSJunchao Zhang 
6353394ed5ebSJunchao Zhang   /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permuation which will be used to fill leafdata */
63549566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(Cperm1,perm1+rem,n1-rem));
6355394ed5ebSJunchao Zhang 
6356394ed5ebSJunchao Zhang   /* Send the remote COOs to their owner */
6357394ed5ebSJunchao Zhang   PetscInt   n2 = nroots,*i2,*j2; /* Buffers for received COOs from other ranks, along with a permutation array */
6358394ed5ebSJunchao Zhang   PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */
63599566063dSJacob Faibussowitsch   PetscCall(PetscMalloc3(n2,&i2,n2,&j2,n2,&perm2));
63609566063dSJacob Faibussowitsch   PetscCall(PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,i1+rem,PETSC_MEMTYPE_HOST,i2,MPI_REPLACE));
63619566063dSJacob Faibussowitsch   PetscCall(PetscSFReduceEnd(sf2,MPIU_INT,i1+rem,i2,MPI_REPLACE));
63629566063dSJacob Faibussowitsch   PetscCall(PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,j1+rem,PETSC_MEMTYPE_HOST,j2,MPI_REPLACE));
63639566063dSJacob Faibussowitsch   PetscCall(PetscSFReduceEnd(sf2,MPIU_INT,j1+rem,j2,MPI_REPLACE));
6364394ed5ebSJunchao Zhang 
63659566063dSJacob Faibussowitsch   PetscCall(PetscFree(offsets));
63669566063dSJacob Faibussowitsch   PetscCall(PetscFree2(sendto,nentries));
6367394ed5ebSJunchao Zhang 
6368394ed5ebSJunchao Zhang   /* ---------------------------------------------------------------*/
6369394ed5ebSJunchao Zhang   /* Sort received COOs by row along with the permutation array     */
6370394ed5ebSJunchao Zhang   /* ---------------------------------------------------------------*/
6371394ed5ebSJunchao Zhang   for (k=0; k<n2; k++) perm2[k] = k;
63729566063dSJacob Faibussowitsch   PetscCall(PetscSortIntWithIntCountArrayPair(n2,i2,j2,perm2));
6373394ed5ebSJunchao Zhang 
6374394ed5ebSJunchao Zhang   /* ---------------------------------------------------------------*/
6375394ed5ebSJunchao Zhang   /* Split received COOs into diag/offdiag portions                 */
6376394ed5ebSJunchao Zhang   /* ---------------------------------------------------------------*/
6377394ed5ebSJunchao Zhang   PetscCount  *rowBegin2,*rowMid2,*rowEnd2;
6378394ed5ebSJunchao Zhang   PetscCount  *Ajmap2,*Aperm2,*Bjmap2,*Bperm2;
6379394ed5ebSJunchao Zhang   PetscCount  Annz2,Bnnz2,Atot2,Btot2;
6380394ed5ebSJunchao Zhang 
63819566063dSJacob Faibussowitsch   PetscCall(PetscCalloc3(m,&rowBegin2,m,&rowMid2,m,&rowEnd2));
63829566063dSJacob Faibussowitsch   PetscCall(MatSplitEntries_Internal(mat,n2,i2,j2,perm2,rowBegin2,rowMid2,rowEnd2,&Atot2,&Aperm2,&Annz2,&Ajmap2,&Btot2,&Bperm2,&Bnnz2,&Bjmap2));
6383394ed5ebSJunchao Zhang 
6384394ed5ebSJunchao Zhang   /* --------------------------------------------------------------------------*/
6385394ed5ebSJunchao Zhang   /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */
6386394ed5ebSJunchao Zhang   /* --------------------------------------------------------------------------*/
6387394ed5ebSJunchao Zhang   PetscInt   *Ai,*Bi;
6388394ed5ebSJunchao Zhang   PetscInt   *Aj,*Bj;
6389394ed5ebSJunchao Zhang 
63909566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(m+1,&Ai));
63919566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(m+1,&Bi));
63929566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(Annz1+Annz2,&Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */
63939566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(Bnnz1+Bnnz2,&Bj));
6394394ed5ebSJunchao Zhang 
6395394ed5ebSJunchao Zhang   PetscCount *Aimap1,*Bimap1,*Aimap2,*Bimap2;
6396158ec288SJunchao Zhang   PetscCall(PetscMalloc1(Annz1,&Aimap1));
6397158ec288SJunchao Zhang   PetscCall(PetscMalloc1(Bnnz1,&Bimap1));
6398158ec288SJunchao Zhang   PetscCall(PetscMalloc1(Annz2,&Aimap2));
6399158ec288SJunchao Zhang   PetscCall(PetscMalloc1(Bnnz2,&Bimap2));
6400394ed5ebSJunchao Zhang 
64019566063dSJacob Faibussowitsch   PetscCall(MatMergeEntries_Internal(mat,j1,j2,rowBegin1,rowMid1,rowBegin2,rowMid2,Ajmap1,Ajmap2,Aimap1,Aimap2,Ai,Aj));
64029566063dSJacob Faibussowitsch   PetscCall(MatMergeEntries_Internal(mat,j1,j2,rowMid1,  rowEnd1,rowMid2,  rowEnd2,Bjmap1,Bjmap2,Bimap1,Bimap2,Bi,Bj));
6403158ec288SJunchao Zhang 
6404158ec288SJunchao Zhang   /* --------------------------------------------------------------------------*/
6405158ec288SJunchao Zhang   /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we     */
6406158ec288SJunchao Zhang   /* expect nonzeros in A/B most likely have local contributing entries        */
6407158ec288SJunchao Zhang   /* --------------------------------------------------------------------------*/
6408158ec288SJunchao Zhang   PetscInt Annz = Ai[m];
6409158ec288SJunchao Zhang   PetscInt Bnnz = Bi[m];
6410158ec288SJunchao Zhang   PetscCount *Ajmap1_new,*Bjmap1_new;
6411158ec288SJunchao Zhang 
6412158ec288SJunchao Zhang   PetscCall(PetscMalloc1(Annz+1,&Ajmap1_new));
6413158ec288SJunchao Zhang   PetscCall(PetscMalloc1(Bnnz+1,&Bjmap1_new));
6414158ec288SJunchao Zhang 
6415158ec288SJunchao Zhang   PetscCall(ExpandJmap_Internal(Annz1,Annz,Aimap1,Ajmap1,Ajmap1_new));
6416158ec288SJunchao Zhang   PetscCall(ExpandJmap_Internal(Bnnz1,Bnnz,Bimap1,Bjmap1,Bjmap1_new));
6417158ec288SJunchao Zhang 
6418158ec288SJunchao Zhang   PetscCall(PetscFree(Aimap1));
6419158ec288SJunchao Zhang   PetscCall(PetscFree(Ajmap1));
6420158ec288SJunchao Zhang   PetscCall(PetscFree(Bimap1));
6421158ec288SJunchao Zhang   PetscCall(PetscFree(Bjmap1));
64229566063dSJacob Faibussowitsch   PetscCall(PetscFree3(rowBegin1,rowMid1,rowEnd1));
64239566063dSJacob Faibussowitsch   PetscCall(PetscFree3(rowBegin2,rowMid2,rowEnd2));
64249566063dSJacob Faibussowitsch   PetscCall(PetscFree3(i1,j1,perm1));
64259566063dSJacob Faibussowitsch   PetscCall(PetscFree3(i2,j2,perm2));
6426394ed5ebSJunchao Zhang 
6427158ec288SJunchao Zhang   Ajmap1 = Ajmap1_new;
6428158ec288SJunchao Zhang   Bjmap1 = Bjmap1_new;
6429158ec288SJunchao Zhang 
6430394ed5ebSJunchao Zhang   /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */
6431394ed5ebSJunchao Zhang   if (Annz < Annz1 + Annz2) {
6432394ed5ebSJunchao Zhang     PetscInt *Aj_new;
64339566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(Annz,&Aj_new));
64349566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(Aj_new,Aj,Annz));
64359566063dSJacob Faibussowitsch     PetscCall(PetscFree(Aj));
6436394ed5ebSJunchao Zhang     Aj   = Aj_new;
6437394ed5ebSJunchao Zhang   }
6438394ed5ebSJunchao Zhang 
6439394ed5ebSJunchao Zhang   if (Bnnz < Bnnz1 + Bnnz2) {
6440394ed5ebSJunchao Zhang     PetscInt *Bj_new;
64419566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(Bnnz,&Bj_new));
64429566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(Bj_new,Bj,Bnnz));
64439566063dSJacob Faibussowitsch     PetscCall(PetscFree(Bj));
6444394ed5ebSJunchao Zhang     Bj   = Bj_new;
6445394ed5ebSJunchao Zhang   }
6446394ed5ebSJunchao Zhang 
6447394ed5ebSJunchao Zhang   /* --------------------------------------------------------------------------------*/
6448cbc6b225SStefano Zampini   /* Create new submatrices for on-process and off-process coupling                  */
6449394ed5ebSJunchao Zhang   /* --------------------------------------------------------------------------------*/
6450394ed5ebSJunchao Zhang   PetscScalar   *Aa,*Ba;
6451cbc6b225SStefano Zampini   MatType       rtype;
6452394ed5ebSJunchao Zhang   Mat_SeqAIJ    *a,*b;
64539566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(Annz,&Aa)); /* Zero matrix on device */
64549566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(Bnnz,&Ba));
6455394ed5ebSJunchao Zhang   /* make Aj[] local, i.e, based off the start column of the diagonal portion */
6456394ed5ebSJunchao Zhang   if (cstart) {for (k=0; k<Annz; k++) Aj[k] -= cstart;}
64579566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&mpiaij->A));
64589566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&mpiaij->B));
64599566063dSJacob Faibussowitsch   PetscCall(MatGetRootType_Private(mat,&rtype));
64609566063dSJacob Faibussowitsch   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,Ai,Aj,Aa,&mpiaij->A));
64619566063dSJacob Faibussowitsch   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,mat->cmap->N,Bi,Bj,Ba,&mpiaij->B));
64629566063dSJacob Faibussowitsch   PetscCall(MatSetUpMultiply_MPIAIJ(mat));
6463cbc6b225SStefano Zampini 
6464394ed5ebSJunchao Zhang   a = (Mat_SeqAIJ*)mpiaij->A->data;
6465394ed5ebSJunchao Zhang   b = (Mat_SeqAIJ*)mpiaij->B->data;
6466394ed5ebSJunchao Zhang   a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */
6467394ed5ebSJunchao Zhang   a->free_a       = b->free_a       = PETSC_TRUE;
6468394ed5ebSJunchao Zhang   a->free_ij      = b->free_ij      = PETSC_TRUE;
6469394ed5ebSJunchao Zhang 
6470cbc6b225SStefano Zampini   /* conversion must happen AFTER multiply setup */
64719566063dSJacob Faibussowitsch   PetscCall(MatConvert(mpiaij->A,rtype,MAT_INPLACE_MATRIX,&mpiaij->A));
64729566063dSJacob Faibussowitsch   PetscCall(MatConvert(mpiaij->B,rtype,MAT_INPLACE_MATRIX,&mpiaij->B));
64739566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&mpiaij->lvec));
64749566063dSJacob Faibussowitsch   PetscCall(MatCreateVecs(mpiaij->B,&mpiaij->lvec,NULL));
64759566063dSJacob Faibussowitsch   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)mpiaij->lvec));
6476cbc6b225SStefano Zampini 
6477394ed5ebSJunchao Zhang   mpiaij->coo_n   = coo_n;
6478394ed5ebSJunchao Zhang   mpiaij->coo_sf  = sf2;
6479394ed5ebSJunchao Zhang   mpiaij->sendlen = nleaves;
6480394ed5ebSJunchao Zhang   mpiaij->recvlen = nroots;
6481394ed5ebSJunchao Zhang 
6482158ec288SJunchao Zhang   mpiaij->Annz    = Annz;
6483158ec288SJunchao Zhang   mpiaij->Bnnz    = Bnnz;
6484158ec288SJunchao Zhang 
6485394ed5ebSJunchao Zhang   mpiaij->Annz2   = Annz2;
6486394ed5ebSJunchao Zhang   mpiaij->Bnnz2   = Bnnz2;
6487394ed5ebSJunchao Zhang 
6488394ed5ebSJunchao Zhang   mpiaij->Atot1   = Atot1;
6489394ed5ebSJunchao Zhang   mpiaij->Atot2   = Atot2;
6490394ed5ebSJunchao Zhang   mpiaij->Btot1   = Btot1;
6491394ed5ebSJunchao Zhang   mpiaij->Btot2   = Btot2;
6492394ed5ebSJunchao Zhang 
6493394ed5ebSJunchao Zhang   mpiaij->Ajmap1  = Ajmap1;
6494394ed5ebSJunchao Zhang   mpiaij->Aperm1  = Aperm1;
6495158ec288SJunchao Zhang 
6496158ec288SJunchao Zhang   mpiaij->Bjmap1  = Bjmap1;
6497394ed5ebSJunchao Zhang   mpiaij->Bperm1  = Bperm1;
6498158ec288SJunchao Zhang 
6499158ec288SJunchao Zhang   mpiaij->Aimap2  = Aimap2;
6500158ec288SJunchao Zhang   mpiaij->Ajmap2  = Ajmap2;
6501158ec288SJunchao Zhang   mpiaij->Aperm2  = Aperm2;
6502158ec288SJunchao Zhang 
6503158ec288SJunchao Zhang   mpiaij->Bimap2  = Bimap2;
6504158ec288SJunchao Zhang   mpiaij->Bjmap2  = Bjmap2;
6505394ed5ebSJunchao Zhang   mpiaij->Bperm2  = Bperm2;
6506394ed5ebSJunchao Zhang 
6507394ed5ebSJunchao Zhang   mpiaij->Cperm1  = Cperm1;
6508394ed5ebSJunchao Zhang 
6509394ed5ebSJunchao Zhang   /* Allocate in preallocation. If not used, it has zero cost on host */
65109566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(mpiaij->sendlen,&mpiaij->sendbuf,mpiaij->recvlen,&mpiaij->recvbuf));
6511394ed5ebSJunchao Zhang   PetscFunctionReturn(0);
6512394ed5ebSJunchao Zhang }
6513394ed5ebSJunchao Zhang 
6514394ed5ebSJunchao Zhang static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat,const PetscScalar v[],InsertMode imode)
6515394ed5ebSJunchao Zhang {
6516394ed5ebSJunchao Zhang   Mat_MPIAIJ           *mpiaij = (Mat_MPIAIJ*)mat->data;
6517394ed5ebSJunchao Zhang   Mat                  A = mpiaij->A,B = mpiaij->B;
6518158ec288SJunchao Zhang   PetscCount           Annz = mpiaij->Annz,Annz2 = mpiaij->Annz2,Bnnz = mpiaij->Bnnz,Bnnz2 = mpiaij->Bnnz2;
6519394ed5ebSJunchao Zhang   PetscScalar          *Aa,*Ba;
6520394ed5ebSJunchao Zhang   PetscScalar          *sendbuf = mpiaij->sendbuf;
6521394ed5ebSJunchao Zhang   PetscScalar          *recvbuf = mpiaij->recvbuf;
6522158ec288SJunchao Zhang   const PetscCount     *Ajmap1 = mpiaij->Ajmap1,*Ajmap2 = mpiaij->Ajmap2,*Aimap2 = mpiaij->Aimap2;
6523158ec288SJunchao Zhang   const PetscCount     *Bjmap1 = mpiaij->Bjmap1,*Bjmap2 = mpiaij->Bjmap2,*Bimap2 = mpiaij->Bimap2;
6524394ed5ebSJunchao Zhang   const PetscCount     *Aperm1 = mpiaij->Aperm1,*Aperm2 = mpiaij->Aperm2,*Bperm1 = mpiaij->Bperm1,*Bperm2 = mpiaij->Bperm2;
6525394ed5ebSJunchao Zhang   const PetscCount     *Cperm1 = mpiaij->Cperm1;
6526394ed5ebSJunchao Zhang 
6527394ed5ebSJunchao Zhang   PetscFunctionBegin;
65289566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArray(A,&Aa)); /* Might read and write matrix values */
65299566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArray(B,&Ba));
6530394ed5ebSJunchao Zhang 
6531394ed5ebSJunchao Zhang   /* Pack entries to be sent to remote */
6532394ed5ebSJunchao Zhang   for (PetscCount i=0; i<mpiaij->sendlen; i++) sendbuf[i] = v[Cperm1[i]];
6533394ed5ebSJunchao Zhang 
6534394ed5ebSJunchao Zhang   /* Send remote entries to their owner and overlap the communication with local computation */
65359566063dSJacob Faibussowitsch   PetscCall(PetscSFReduceWithMemTypeBegin(mpiaij->coo_sf,MPIU_SCALAR,PETSC_MEMTYPE_HOST,sendbuf,PETSC_MEMTYPE_HOST,recvbuf,MPI_REPLACE));
6536394ed5ebSJunchao Zhang   /* Add local entries to A and B */
6537158ec288SJunchao Zhang   for (PetscCount i=0; i<Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */
6538158ec288SJunchao Zhang     PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stablility */
6539158ec288SJunchao Zhang     for (PetscCount k=Ajmap1[i]; k<Ajmap1[i+1]; k++) sum += v[Aperm1[k]];
6540158ec288SJunchao Zhang     Aa[i] = (imode == INSERT_VALUES? 0.0 : Aa[i]) + sum;
6541394ed5ebSJunchao Zhang   }
6542158ec288SJunchao Zhang   for (PetscCount i=0; i<Bnnz; i++) {
6543158ec288SJunchao Zhang     PetscScalar sum = 0.0;
6544158ec288SJunchao Zhang     for (PetscCount k=Bjmap1[i]; k<Bjmap1[i+1]; k++) sum += v[Bperm1[k]];
6545158ec288SJunchao Zhang     Ba[i] = (imode == INSERT_VALUES? 0.0 : Ba[i]) + sum;
6546394ed5ebSJunchao Zhang   }
65479566063dSJacob Faibussowitsch   PetscCall(PetscSFReduceEnd(mpiaij->coo_sf,MPIU_SCALAR,sendbuf,recvbuf,MPI_REPLACE));
6548394ed5ebSJunchao Zhang 
6549394ed5ebSJunchao Zhang   /* Add received remote entries to A and B */
6550394ed5ebSJunchao Zhang   for (PetscCount i=0; i<Annz2; i++) {
6551394ed5ebSJunchao Zhang     for (PetscCount k=Ajmap2[i]; k<Ajmap2[i+1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]];
6552394ed5ebSJunchao Zhang   }
6553394ed5ebSJunchao Zhang   for (PetscCount i=0; i<Bnnz2; i++) {
6554394ed5ebSJunchao Zhang     for (PetscCount k=Bjmap2[i]; k<Bjmap2[i+1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]];
6555394ed5ebSJunchao Zhang   }
65569566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArray(A,&Aa));
65579566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArray(B,&Ba));
6558394ed5ebSJunchao Zhang   PetscFunctionReturn(0);
6559394ed5ebSJunchao Zhang }
6560394ed5ebSJunchao Zhang 
65614222ddf1SHong Zhang /* ----------------------------------------------------------------*/
65624222ddf1SHong Zhang 
6563ccd8e176SBarry Smith /*MC
6564ccd8e176SBarry Smith    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6565ccd8e176SBarry Smith 
6566ccd8e176SBarry Smith    Options Database Keys:
6567ccd8e176SBarry Smith . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
6568ccd8e176SBarry Smith 
6569ccd8e176SBarry Smith    Level: beginner
65700cd7f59aSBarry Smith 
65710cd7f59aSBarry Smith    Notes:
65720cd7f59aSBarry Smith     MatSetValues() may be called for this matrix type with a NULL argument for the numerical values,
65730cd7f59aSBarry Smith     in this case the values associated with the rows and columns one passes in are set to zero
65740cd7f59aSBarry Smith     in the matrix
65750cd7f59aSBarry Smith 
65760cd7f59aSBarry Smith     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
65770cd7f59aSBarry Smith     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
6578ccd8e176SBarry Smith 
657969b1f4b7SBarry Smith .seealso: MatCreateAIJ()
6580ccd8e176SBarry Smith M*/
6581ccd8e176SBarry Smith 
65828cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6583ccd8e176SBarry Smith {
6584ccd8e176SBarry Smith   Mat_MPIAIJ     *b;
6585ccd8e176SBarry Smith   PetscMPIInt    size;
6586ccd8e176SBarry Smith 
6587ccd8e176SBarry Smith   PetscFunctionBegin;
65889566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size));
65892205254eSKarl Rupp 
65909566063dSJacob Faibussowitsch   PetscCall(PetscNewLog(B,&b));
6591ccd8e176SBarry Smith   B->data       = (void*)b;
65929566063dSJacob Faibussowitsch   PetscCall(PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps)));
6593ccd8e176SBarry Smith   B->assembled  = PETSC_FALSE;
6594ccd8e176SBarry Smith   B->insertmode = NOT_SET_VALUES;
6595ccd8e176SBarry Smith   b->size       = size;
65962205254eSKarl Rupp 
65979566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank));
6598ccd8e176SBarry Smith 
6599ccd8e176SBarry Smith   /* build cache for off array entries formed */
66009566063dSJacob Faibussowitsch   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash));
66012205254eSKarl Rupp 
6602ccd8e176SBarry Smith   b->donotstash  = PETSC_FALSE;
6603f4259b30SLisandro Dalcin   b->colmap      = NULL;
6604f4259b30SLisandro Dalcin   b->garray      = NULL;
6605ccd8e176SBarry Smith   b->roworiented = PETSC_TRUE;
6606ccd8e176SBarry Smith 
6607ccd8e176SBarry Smith   /* stuff used for matrix vector multiply */
66080298fd71SBarry Smith   b->lvec  = NULL;
66090298fd71SBarry Smith   b->Mvctx = NULL;
6610ccd8e176SBarry Smith 
6611ccd8e176SBarry Smith   /* stuff for MatGetRow() */
6612f4259b30SLisandro Dalcin   b->rowindices   = NULL;
6613f4259b30SLisandro Dalcin   b->rowvalues    = NULL;
6614ccd8e176SBarry Smith   b->getrowactive = PETSC_FALSE;
6615ccd8e176SBarry Smith 
6616f719121fSJed Brown   /* flexible pointer used in CUSPARSE classes */
66170298fd71SBarry Smith   b->spptr = NULL;
6618f60c3dc2SHong Zhang 
66199566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ));
66209566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ));
66219566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ));
66229566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ));
66239566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ));
66249566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ));
66259566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ));
66269566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ));
66279566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM));
66289566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL));
66293d0639e7SStefano Zampini #if defined(PETSC_HAVE_CUDA)
66309566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE));
66313d0639e7SStefano Zampini #endif
66323d0639e7SStefano Zampini #if defined(PETSC_HAVE_KOKKOS_KERNELS)
66339566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos));
66343d0639e7SStefano Zampini #endif
66359779e05dSSatish Balay #if defined(PETSC_HAVE_MKL_SPARSE)
66369566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL));
6637191b95cbSRichard Tran Mills #endif
66389566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL));
66399566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ));
66409566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ));
66419566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense));
66425d7652ecSHong Zhang #if defined(PETSC_HAVE_ELEMENTAL)
66439566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental));
66445d7652ecSHong Zhang #endif
6645d24d4204SJose E. Roman #if defined(PETSC_HAVE_SCALAPACK)
66469566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK));
6647d24d4204SJose E. Roman #endif
66489566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS));
66499566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL));
66503dad0653Sstefano_zampini #if defined(PETSC_HAVE_HYPRE)
66519566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE));
66529566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ));
66533dad0653Sstefano_zampini #endif
66549566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ));
66559566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ));
66569566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetPreallocationCOO_C",MatSetPreallocationCOO_MPIAIJ));
66579566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetValuesCOO_C",MatSetValuesCOO_MPIAIJ));
66589566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ));
6659ccd8e176SBarry Smith   PetscFunctionReturn(0);
6660ccd8e176SBarry Smith }
666181824310SBarry Smith 
6662cce60c4dSBarry Smith /*@C
666303bfb495SBarry Smith      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
666403bfb495SBarry Smith          and "off-diagonal" part of the matrix in CSR format.
666503bfb495SBarry Smith 
6666d083f849SBarry Smith    Collective
666703bfb495SBarry Smith 
666803bfb495SBarry Smith    Input Parameters:
666903bfb495SBarry Smith +  comm - MPI communicator
667003bfb495SBarry Smith .  m - number of local rows (Cannot be PETSC_DECIDE)
667103bfb495SBarry Smith .  n - This value should be the same as the local size used in creating the
667203bfb495SBarry Smith        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
667303bfb495SBarry Smith        calculated if N is given) For square matrices n is almost always m.
667403bfb495SBarry Smith .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
667503bfb495SBarry Smith .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
6676483a2f95SBarry Smith .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
667704ccdda3SJunchao Zhang .   j - column indices, which must be local, i.e., based off the start column of the diagonal portion
667803bfb495SBarry Smith .   a - matrix values
6679483a2f95SBarry Smith .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
668004ccdda3SJunchao Zhang .   oj - column indices, which must be global, representing global columns in the MPIAIJ matrix
668103bfb495SBarry Smith -   oa - matrix values
668203bfb495SBarry Smith 
668303bfb495SBarry Smith    Output Parameter:
668403bfb495SBarry Smith .   mat - the matrix
668503bfb495SBarry Smith 
668603bfb495SBarry Smith    Level: advanced
668703bfb495SBarry Smith 
668803bfb495SBarry Smith    Notes:
6689292fb18eSBarry Smith        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6690292fb18eSBarry Smith        must free the arrays once the matrix has been destroyed and not before.
669103bfb495SBarry Smith 
669203bfb495SBarry Smith        The i and j indices are 0 based
669303bfb495SBarry Smith 
669469b1f4b7SBarry Smith        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
669503bfb495SBarry Smith 
66967b55108eSBarry Smith        This sets local rows and cannot be used to set off-processor values.
66977b55108eSBarry Smith 
6698dca341c0SJed Brown        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6699dca341c0SJed Brown        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6700dca341c0SJed Brown        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6701dca341c0SJed Brown        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6702eeb24464SBarry Smith        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
6703dca341c0SJed Brown        communication if it is known that only local entries will be set.
670403bfb495SBarry Smith 
670503bfb495SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
67065f4d30c4SBarry Smith           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
67072b26979fSBarry Smith @*/
67082205254eSKarl Rupp PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
670903bfb495SBarry Smith {
671003bfb495SBarry Smith   Mat_MPIAIJ     *maij;
671103bfb495SBarry Smith 
671203bfb495SBarry Smith   PetscFunctionBegin;
671308401ef6SPierre Jolivet   PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
6714*aed4548fSBarry Smith   PetscCheck(i[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
6715*aed4548fSBarry Smith   PetscCheck(oi[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
67169566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm,mat));
67179566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*mat,m,n,M,N));
67189566063dSJacob Faibussowitsch   PetscCall(MatSetType(*mat,MATMPIAIJ));
671903bfb495SBarry Smith   maij = (Mat_MPIAIJ*) (*mat)->data;
67202205254eSKarl Rupp 
67218d7a6e47SBarry Smith   (*mat)->preallocated = PETSC_TRUE;
672203bfb495SBarry Smith 
67239566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp((*mat)->rmap));
67249566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp((*mat)->cmap));
672503bfb495SBarry Smith 
67269566063dSJacob Faibussowitsch   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A));
67279566063dSJacob Faibussowitsch   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B));
672803bfb495SBarry Smith 
67299566063dSJacob Faibussowitsch   PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
67309566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY));
67319566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY));
67329566063dSJacob Faibussowitsch   PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE));
67339566063dSJacob Faibussowitsch   PetscCall(MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
673403bfb495SBarry Smith   PetscFunctionReturn(0);
673503bfb495SBarry Smith }
673603bfb495SBarry Smith 
67374e84afc0SStefano Zampini typedef struct {
67384e84afc0SStefano Zampini   Mat       *mp;    /* intermediate products */
67394e84afc0SStefano Zampini   PetscBool *mptmp; /* is the intermediate product temporary ? */
67404e84afc0SStefano Zampini   PetscInt  cp;     /* number of intermediate products */
67414e84afc0SStefano Zampini 
67424e84afc0SStefano Zampini   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
67434e84afc0SStefano Zampini   PetscInt    *startsj_s,*startsj_r;
67444e84afc0SStefano Zampini   PetscScalar *bufa;
67454e84afc0SStefano Zampini   Mat         P_oth;
67464e84afc0SStefano Zampini 
67474e84afc0SStefano Zampini   /* may take advantage of merging product->B */
6748ddea5d60SJunchao Zhang   Mat Bloc; /* B-local by merging diag and off-diag */
67494e84afc0SStefano Zampini 
6750ddea5d60SJunchao Zhang   /* cusparse does not have support to split between symbolic and numeric phases.
67514e84afc0SStefano Zampini      When api_user is true, we don't need to update the numerical values
67524e84afc0SStefano Zampini      of the temporary storage */
67534e84afc0SStefano Zampini   PetscBool reusesym;
67544e84afc0SStefano Zampini 
67554e84afc0SStefano Zampini   /* support for COO values insertion */
6756ddea5d60SJunchao Zhang   PetscScalar  *coo_v,*coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
6757ddea5d60SJunchao Zhang   PetscInt     **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */
6758ddea5d60SJunchao Zhang   PetscInt     **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */
6759ddea5d60SJunchao Zhang   PetscBool    hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */
6760c215019aSStefano Zampini   PetscSF      sf; /* used for non-local values insertion and memory malloc */
6761c215019aSStefano Zampini   PetscMemType mtype;
67624e84afc0SStefano Zampini 
67634e84afc0SStefano Zampini   /* customization */
67644e84afc0SStefano Zampini   PetscBool abmerge;
6765abb89eb1SStefano Zampini   PetscBool P_oth_bind;
67664e84afc0SStefano Zampini } MatMatMPIAIJBACKEND;
67674e84afc0SStefano Zampini 
67684e84afc0SStefano Zampini PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
67694e84afc0SStefano Zampini {
67704e84afc0SStefano Zampini   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND*)data;
67714e84afc0SStefano Zampini   PetscInt            i;
67724e84afc0SStefano Zampini 
67734e84afc0SStefano Zampini   PetscFunctionBegin;
67749566063dSJacob Faibussowitsch   PetscCall(PetscFree2(mmdata->startsj_s,mmdata->startsj_r));
67759566063dSJacob Faibussowitsch   PetscCall(PetscFree(mmdata->bufa));
67769566063dSJacob Faibussowitsch   PetscCall(PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_v));
67779566063dSJacob Faibussowitsch   PetscCall(PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_w));
67789566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&mmdata->P_oth));
67799566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&mmdata->Bloc));
67809566063dSJacob Faibussowitsch   PetscCall(PetscSFDestroy(&mmdata->sf));
67814e84afc0SStefano Zampini   for (i = 0; i < mmdata->cp; i++) {
67829566063dSJacob Faibussowitsch     PetscCall(MatDestroy(&mmdata->mp[i]));
67834e84afc0SStefano Zampini   }
67849566063dSJacob Faibussowitsch   PetscCall(PetscFree2(mmdata->mp,mmdata->mptmp));
67859566063dSJacob Faibussowitsch   PetscCall(PetscFree(mmdata->own[0]));
67869566063dSJacob Faibussowitsch   PetscCall(PetscFree(mmdata->own));
67879566063dSJacob Faibussowitsch   PetscCall(PetscFree(mmdata->off[0]));
67889566063dSJacob Faibussowitsch   PetscCall(PetscFree(mmdata->off));
67899566063dSJacob Faibussowitsch   PetscCall(PetscFree(mmdata));
67904e84afc0SStefano Zampini   PetscFunctionReturn(0);
67914e84afc0SStefano Zampini }
67924e84afc0SStefano Zampini 
6793fff043a9SJunchao Zhang /* Copy selected n entries with indices in idx[] of A to v[].
6794fff043a9SJunchao Zhang    If idx is NULL, copy the whole data array of A to v[]
6795fff043a9SJunchao Zhang  */
6796c215019aSStefano Zampini static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
6797c215019aSStefano Zampini {
6798c215019aSStefano Zampini   PetscErrorCode (*f)(Mat,PetscInt,const PetscInt[],PetscScalar[]);
6799c215019aSStefano Zampini 
6800c215019aSStefano Zampini   PetscFunctionBegin;
68019566063dSJacob Faibussowitsch   PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJCopySubArray_C",&f));
6802c215019aSStefano Zampini   if (f) {
68039566063dSJacob Faibussowitsch     PetscCall((*f)(A,n,idx,v));
6804c215019aSStefano Zampini   } else {
6805c215019aSStefano Zampini     const PetscScalar *vv;
6806c215019aSStefano Zampini 
68079566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJGetArrayRead(A,&vv));
6808c215019aSStefano Zampini     if (n && idx) {
6809c215019aSStefano Zampini       PetscScalar    *w = v;
6810c215019aSStefano Zampini       const PetscInt *oi = idx;
6811c215019aSStefano Zampini       PetscInt       j;
6812c215019aSStefano Zampini 
6813c215019aSStefano Zampini       for (j = 0; j < n; j++) *w++ = vv[*oi++];
6814c215019aSStefano Zampini     } else {
68159566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(v,vv,n));
6816c215019aSStefano Zampini     }
68179566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJRestoreArrayRead(A,&vv));
6818c215019aSStefano Zampini   }
6819c215019aSStefano Zampini   PetscFunctionReturn(0);
6820c215019aSStefano Zampini }
6821c215019aSStefano Zampini 
68224e84afc0SStefano Zampini static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
68234e84afc0SStefano Zampini {
68244e84afc0SStefano Zampini   MatMatMPIAIJBACKEND *mmdata;
68254e84afc0SStefano Zampini   PetscInt            i,n_d,n_o;
68264e84afc0SStefano Zampini 
68274e84afc0SStefano Zampini   PetscFunctionBegin;
68284e84afc0SStefano Zampini   MatCheckProduct(C,1);
682928b400f6SJacob Faibussowitsch   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
68304e84afc0SStefano Zampini   mmdata = (MatMatMPIAIJBACKEND*)C->product->data;
68314e84afc0SStefano Zampini   if (!mmdata->reusesym) { /* update temporary matrices */
68324e84afc0SStefano Zampini     if (mmdata->P_oth) {
68339566063dSJacob Faibussowitsch       PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A,C->product->B,MAT_REUSE_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth));
68344e84afc0SStefano Zampini     }
68354e84afc0SStefano Zampini     if (mmdata->Bloc) {
68369566063dSJacob Faibussowitsch       PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B,MAT_REUSE_MATRIX,NULL,&mmdata->Bloc));
68374e84afc0SStefano Zampini     }
68384e84afc0SStefano Zampini   }
68394e84afc0SStefano Zampini   mmdata->reusesym = PETSC_FALSE;
6840abb89eb1SStefano Zampini 
6841abb89eb1SStefano Zampini   for (i = 0; i < mmdata->cp; i++) {
684208401ef6SPierre Jolivet     PetscCheck(mmdata->mp[i]->ops->productnumeric,PetscObjectComm((PetscObject)mmdata->mp[i]),PETSC_ERR_PLIB,"Missing numeric op for %s",MatProductTypes[mmdata->mp[i]->product->type]);
68439566063dSJacob Faibussowitsch     PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]));
6844abb89eb1SStefano Zampini   }
68454e84afc0SStefano Zampini   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
68464e84afc0SStefano Zampini     PetscInt noff = mmdata->off[i+1] - mmdata->off[i];
68474e84afc0SStefano Zampini 
68484e84afc0SStefano Zampini     if (mmdata->mptmp[i]) continue;
68494e84afc0SStefano Zampini     if (noff) {
6850c215019aSStefano Zampini       PetscInt nown = mmdata->own[i+1] - mmdata->own[i];
6851c215019aSStefano Zampini 
68529566063dSJacob Faibussowitsch       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],noff,mmdata->off[i],mmdata->coo_w + n_o));
68539566063dSJacob Faibussowitsch       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],nown,mmdata->own[i],mmdata->coo_v + n_d));
68544e84afc0SStefano Zampini       n_o += noff;
68554e84afc0SStefano Zampini       n_d += nown;
68564e84afc0SStefano Zampini     } else {
6857c215019aSStefano Zampini       Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mmdata->mp[i]->data;
6858c215019aSStefano Zampini 
68599566063dSJacob Faibussowitsch       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],mm->nz,NULL,mmdata->coo_v + n_d));
68604e84afc0SStefano Zampini       n_d += mm->nz;
68614e84afc0SStefano Zampini     }
68624e84afc0SStefano Zampini   }
6863c215019aSStefano Zampini   if (mmdata->hasoffproc) { /* offprocess insertion */
68649566063dSJacob Faibussowitsch     PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d));
68659566063dSJacob Faibussowitsch     PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d));
68664e84afc0SStefano Zampini   }
68679566063dSJacob Faibussowitsch   PetscCall(MatSetValuesCOO(C,mmdata->coo_v,INSERT_VALUES));
68684e84afc0SStefano Zampini   PetscFunctionReturn(0);
68694e84afc0SStefano Zampini }
68704e84afc0SStefano Zampini 
68714e84afc0SStefano Zampini /* Support for Pt * A, A * P, or Pt * A * P */
68724e84afc0SStefano Zampini #define MAX_NUMBER_INTERMEDIATE 4
68734e84afc0SStefano Zampini PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
68744e84afc0SStefano Zampini {
68754e84afc0SStefano Zampini   Mat_Product            *product = C->product;
6876ddea5d60SJunchao Zhang   Mat                    A,P,mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
68774e84afc0SStefano Zampini   Mat_MPIAIJ             *a,*p;
68784e84afc0SStefano Zampini   MatMatMPIAIJBACKEND    *mmdata;
68794e84afc0SStefano Zampini   ISLocalToGlobalMapping P_oth_l2g = NULL;
68804e84afc0SStefano Zampini   IS                     glob = NULL;
68814e84afc0SStefano Zampini   const char             *prefix;
68824e84afc0SStefano Zampini   char                   pprefix[256];
68834e84afc0SStefano Zampini   const PetscInt         *globidx,*P_oth_idx;
688482a78a4eSJed Brown   PetscInt               i,j,cp,m,n,M,N,*coo_i,*coo_j;
688582a78a4eSJed Brown   PetscCount             ncoo,ncoo_d,ncoo_o,ncoo_oown;
6886ddea5d60SJunchao Zhang   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE],rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */
6887ddea5d60SJunchao Zhang                                                                                         /* type-0: consecutive, start from 0; type-1: consecutive with */
6888ddea5d60SJunchao Zhang                                                                                         /* a base offset; type-2: sparse with a local to global map table */
6889ddea5d60SJunchao Zhang   const PetscInt         *cmapa[MAX_NUMBER_INTERMEDIATE],*rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */
6890ddea5d60SJunchao Zhang 
68914e84afc0SStefano Zampini   MatProductType         ptype;
6892c215019aSStefano Zampini   PetscBool              mptmp[MAX_NUMBER_INTERMEDIATE],hasoffproc = PETSC_FALSE,iscuda,iskokk;
68934e84afc0SStefano Zampini   PetscMPIInt            size;
68944e84afc0SStefano Zampini 
68954e84afc0SStefano Zampini   PetscFunctionBegin;
68964e84afc0SStefano Zampini   MatCheckProduct(C,1);
689728b400f6SJacob Faibussowitsch   PetscCheck(!product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
68984e84afc0SStefano Zampini   ptype = product->type;
6899fa046f9fSJunchao Zhang   if (product->A->symmetric && ptype == MATPRODUCT_AtB) {
6900fa046f9fSJunchao Zhang     ptype = MATPRODUCT_AB;
6901fa046f9fSJunchao Zhang     product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE;
6902fa046f9fSJunchao Zhang   }
69034e84afc0SStefano Zampini   switch (ptype) {
69044e84afc0SStefano Zampini   case MATPRODUCT_AB:
69054e84afc0SStefano Zampini     A = product->A;
69064e84afc0SStefano Zampini     P = product->B;
69074e84afc0SStefano Zampini     m = A->rmap->n;
69084e84afc0SStefano Zampini     n = P->cmap->n;
69094e84afc0SStefano Zampini     M = A->rmap->N;
69104e84afc0SStefano Zampini     N = P->cmap->N;
6911ddea5d60SJunchao Zhang     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
69124e84afc0SStefano Zampini     break;
69134e84afc0SStefano Zampini   case MATPRODUCT_AtB:
69144e84afc0SStefano Zampini     P = product->A;
69154e84afc0SStefano Zampini     A = product->B;
69164e84afc0SStefano Zampini     m = P->cmap->n;
69174e84afc0SStefano Zampini     n = A->cmap->n;
69184e84afc0SStefano Zampini     M = P->cmap->N;
69194e84afc0SStefano Zampini     N = A->cmap->N;
69204e84afc0SStefano Zampini     hasoffproc = PETSC_TRUE;
69214e84afc0SStefano Zampini     break;
69224e84afc0SStefano Zampini   case MATPRODUCT_PtAP:
69234e84afc0SStefano Zampini     A = product->A;
69244e84afc0SStefano Zampini     P = product->B;
69254e84afc0SStefano Zampini     m = P->cmap->n;
69264e84afc0SStefano Zampini     n = P->cmap->n;
69274e84afc0SStefano Zampini     M = P->cmap->N;
69284e84afc0SStefano Zampini     N = P->cmap->N;
69294e84afc0SStefano Zampini     hasoffproc = PETSC_TRUE;
69304e84afc0SStefano Zampini     break;
69314e84afc0SStefano Zampini   default:
693298921bdaSJacob Faibussowitsch     SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]);
69334e84afc0SStefano Zampini   }
69349566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C),&size));
69354e84afc0SStefano Zampini   if (size == 1) hasoffproc = PETSC_FALSE;
69364e84afc0SStefano Zampini 
69374e84afc0SStefano Zampini   /* defaults */
69384e84afc0SStefano Zampini   for (i=0;i<MAX_NUMBER_INTERMEDIATE;i++) {
69394e84afc0SStefano Zampini     mp[i]    = NULL;
69404e84afc0SStefano Zampini     mptmp[i] = PETSC_FALSE;
69414e84afc0SStefano Zampini     rmapt[i] = -1;
69424e84afc0SStefano Zampini     cmapt[i] = -1;
69434e84afc0SStefano Zampini     rmapa[i] = NULL;
69444e84afc0SStefano Zampini     cmapa[i] = NULL;
69454e84afc0SStefano Zampini   }
69464e84afc0SStefano Zampini 
69474e84afc0SStefano Zampini   /* customization */
69489566063dSJacob Faibussowitsch   PetscCall(PetscNew(&mmdata));
69494e84afc0SStefano Zampini   mmdata->reusesym = product->api_user;
69504e84afc0SStefano Zampini   if (ptype == MATPRODUCT_AB) {
69514e84afc0SStefano Zampini     if (product->api_user) {
6952d0609cedSBarry Smith       PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat");
69539566063dSJacob Faibussowitsch       PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL));
69549566063dSJacob Faibussowitsch       PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL));
6955d0609cedSBarry Smith       PetscOptionsEnd();
69564e84afc0SStefano Zampini     } else {
6957d0609cedSBarry Smith       PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat");
69589566063dSJacob Faibussowitsch       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL));
69599566063dSJacob Faibussowitsch       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL));
6960d0609cedSBarry Smith       PetscOptionsEnd();
6961abb89eb1SStefano Zampini     }
6962abb89eb1SStefano Zampini   } else if (ptype == MATPRODUCT_PtAP) {
6963abb89eb1SStefano Zampini     if (product->api_user) {
6964d0609cedSBarry Smith       PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat");
69659566063dSJacob Faibussowitsch       PetscCall(PetscOptionsBool("-matptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL));
6966d0609cedSBarry Smith       PetscOptionsEnd();
6967abb89eb1SStefano Zampini     } else {
6968d0609cedSBarry Smith       PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");
69699566063dSJacob Faibussowitsch       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL));
6970d0609cedSBarry Smith       PetscOptionsEnd();
69714e84afc0SStefano Zampini     }
69724e84afc0SStefano Zampini   }
69734e84afc0SStefano Zampini   a = (Mat_MPIAIJ*)A->data;
69744e84afc0SStefano Zampini   p = (Mat_MPIAIJ*)P->data;
69759566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(C,m,n,M,N));
69769566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(C->rmap));
69779566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(C->cmap));
69789566063dSJacob Faibussowitsch   PetscCall(MatSetType(C,((PetscObject)A)->type_name));
69799566063dSJacob Faibussowitsch   PetscCall(MatGetOptionsPrefix(C,&prefix));
6980ddea5d60SJunchao Zhang 
6981ddea5d60SJunchao Zhang   cp   = 0;
69824e84afc0SStefano Zampini   switch (ptype) {
69834e84afc0SStefano Zampini   case MATPRODUCT_AB: /* A * P */
69849566063dSJacob Faibussowitsch     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth));
69854e84afc0SStefano Zampini 
6986ddea5d60SJunchao Zhang     /* A_diag * P_local (merged or not) */
6987ddea5d60SJunchao Zhang     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
69884e84afc0SStefano Zampini       /* P is product->B */
69899566063dSJacob Faibussowitsch       PetscCall(MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc));
69909566063dSJacob Faibussowitsch       PetscCall(MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]));
69919566063dSJacob Faibussowitsch       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB));
69929566063dSJacob Faibussowitsch       PetscCall(MatProductSetFill(mp[cp],product->fill));
69939566063dSJacob Faibussowitsch       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
69949566063dSJacob Faibussowitsch       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
69959566063dSJacob Faibussowitsch       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
69964e84afc0SStefano Zampini       mp[cp]->product->api_user = product->api_user;
69979566063dSJacob Faibussowitsch       PetscCall(MatProductSetFromOptions(mp[cp]));
699808401ef6SPierre Jolivet       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
69999566063dSJacob Faibussowitsch       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
70009566063dSJacob Faibussowitsch       PetscCall(ISGetIndices(glob,&globidx));
70014e84afc0SStefano Zampini       rmapt[cp] = 1;
70024e84afc0SStefano Zampini       cmapt[cp] = 2;
70034e84afc0SStefano Zampini       cmapa[cp] = globidx;
70044e84afc0SStefano Zampini       mptmp[cp] = PETSC_FALSE;
70054e84afc0SStefano Zampini       cp++;
7006ddea5d60SJunchao Zhang     } else { /* A_diag * P_diag and A_diag * P_off */
70079566063dSJacob Faibussowitsch       PetscCall(MatProductCreate(a->A,p->A,NULL,&mp[cp]));
70089566063dSJacob Faibussowitsch       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB));
70099566063dSJacob Faibussowitsch       PetscCall(MatProductSetFill(mp[cp],product->fill));
70109566063dSJacob Faibussowitsch       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
70119566063dSJacob Faibussowitsch       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
70129566063dSJacob Faibussowitsch       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
70134e84afc0SStefano Zampini       mp[cp]->product->api_user = product->api_user;
70149566063dSJacob Faibussowitsch       PetscCall(MatProductSetFromOptions(mp[cp]));
701508401ef6SPierre Jolivet       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
70169566063dSJacob Faibussowitsch       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
70174e84afc0SStefano Zampini       rmapt[cp] = 1;
70184e84afc0SStefano Zampini       cmapt[cp] = 1;
70194e84afc0SStefano Zampini       mptmp[cp] = PETSC_FALSE;
70204e84afc0SStefano Zampini       cp++;
70219566063dSJacob Faibussowitsch       PetscCall(MatProductCreate(a->A,p->B,NULL,&mp[cp]));
70229566063dSJacob Faibussowitsch       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB));
70239566063dSJacob Faibussowitsch       PetscCall(MatProductSetFill(mp[cp],product->fill));
70249566063dSJacob Faibussowitsch       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
70259566063dSJacob Faibussowitsch       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
70269566063dSJacob Faibussowitsch       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
70274e84afc0SStefano Zampini       mp[cp]->product->api_user = product->api_user;
70289566063dSJacob Faibussowitsch       PetscCall(MatProductSetFromOptions(mp[cp]));
702908401ef6SPierre Jolivet       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
70309566063dSJacob Faibussowitsch       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
70314e84afc0SStefano Zampini       rmapt[cp] = 1;
70324e84afc0SStefano Zampini       cmapt[cp] = 2;
70334e84afc0SStefano Zampini       cmapa[cp] = p->garray;
70344e84afc0SStefano Zampini       mptmp[cp] = PETSC_FALSE;
70354e84afc0SStefano Zampini       cp++;
70364e84afc0SStefano Zampini     }
7037ddea5d60SJunchao Zhang 
7038ddea5d60SJunchao Zhang     /* A_off * P_other */
70394e84afc0SStefano Zampini     if (mmdata->P_oth) {
70409566063dSJacob Faibussowitsch       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g)); /* make P_oth use local col ids */
70419566063dSJacob Faibussowitsch       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx));
70429566063dSJacob Faibussowitsch       PetscCall(MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name));
70439566063dSJacob Faibussowitsch       PetscCall(MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind));
70449566063dSJacob Faibussowitsch       PetscCall(MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]));
70459566063dSJacob Faibussowitsch       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB));
70469566063dSJacob Faibussowitsch       PetscCall(MatProductSetFill(mp[cp],product->fill));
70479566063dSJacob Faibussowitsch       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
70489566063dSJacob Faibussowitsch       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
70499566063dSJacob Faibussowitsch       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
70504e84afc0SStefano Zampini       mp[cp]->product->api_user = product->api_user;
70519566063dSJacob Faibussowitsch       PetscCall(MatProductSetFromOptions(mp[cp]));
705208401ef6SPierre Jolivet       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
70539566063dSJacob Faibussowitsch       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
70544e84afc0SStefano Zampini       rmapt[cp] = 1;
70554e84afc0SStefano Zampini       cmapt[cp] = 2;
70564e84afc0SStefano Zampini       cmapa[cp] = P_oth_idx;
70574e84afc0SStefano Zampini       mptmp[cp] = PETSC_FALSE;
70584e84afc0SStefano Zampini       cp++;
70594e84afc0SStefano Zampini     }
70604e84afc0SStefano Zampini     break;
7061ddea5d60SJunchao Zhang 
70624e84afc0SStefano Zampini   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
70634e84afc0SStefano Zampini     /* A is product->B */
70649566063dSJacob Faibussowitsch     PetscCall(MatMPIAIJGetLocalMatMerge(A,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc));
7065ddea5d60SJunchao Zhang     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
70669566063dSJacob Faibussowitsch       PetscCall(MatProductCreate(mmdata->Bloc,mmdata->Bloc,NULL,&mp[cp]));
70679566063dSJacob Faibussowitsch       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB));
70689566063dSJacob Faibussowitsch       PetscCall(MatProductSetFill(mp[cp],product->fill));
70699566063dSJacob Faibussowitsch       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
70709566063dSJacob Faibussowitsch       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
70719566063dSJacob Faibussowitsch       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
70724e84afc0SStefano Zampini       mp[cp]->product->api_user = product->api_user;
70739566063dSJacob Faibussowitsch       PetscCall(MatProductSetFromOptions(mp[cp]));
707408401ef6SPierre Jolivet       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
70759566063dSJacob Faibussowitsch       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
70769566063dSJacob Faibussowitsch       PetscCall(ISGetIndices(glob,&globidx));
70774e84afc0SStefano Zampini       rmapt[cp] = 2;
70784e84afc0SStefano Zampini       rmapa[cp] = globidx;
70794e84afc0SStefano Zampini       cmapt[cp] = 2;
70804e84afc0SStefano Zampini       cmapa[cp] = globidx;
70814e84afc0SStefano Zampini       mptmp[cp] = PETSC_FALSE;
70824e84afc0SStefano Zampini       cp++;
70834e84afc0SStefano Zampini     } else {
70849566063dSJacob Faibussowitsch       PetscCall(MatProductCreate(p->A,mmdata->Bloc,NULL,&mp[cp]));
70859566063dSJacob Faibussowitsch       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB));
70869566063dSJacob Faibussowitsch       PetscCall(MatProductSetFill(mp[cp],product->fill));
70879566063dSJacob Faibussowitsch       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
70889566063dSJacob Faibussowitsch       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
70899566063dSJacob Faibussowitsch       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
70904e84afc0SStefano Zampini       mp[cp]->product->api_user = product->api_user;
70919566063dSJacob Faibussowitsch       PetscCall(MatProductSetFromOptions(mp[cp]));
709208401ef6SPierre Jolivet       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
70939566063dSJacob Faibussowitsch       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
70949566063dSJacob Faibussowitsch       PetscCall(ISGetIndices(glob,&globidx));
70954e84afc0SStefano Zampini       rmapt[cp] = 1;
70964e84afc0SStefano Zampini       cmapt[cp] = 2;
70974e84afc0SStefano Zampini       cmapa[cp] = globidx;
70984e84afc0SStefano Zampini       mptmp[cp] = PETSC_FALSE;
70994e84afc0SStefano Zampini       cp++;
71009566063dSJacob Faibussowitsch       PetscCall(MatProductCreate(p->B,mmdata->Bloc,NULL,&mp[cp]));
71019566063dSJacob Faibussowitsch       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB));
71029566063dSJacob Faibussowitsch       PetscCall(MatProductSetFill(mp[cp],product->fill));
71039566063dSJacob Faibussowitsch       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
71049566063dSJacob Faibussowitsch       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
71059566063dSJacob Faibussowitsch       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
71064e84afc0SStefano Zampini       mp[cp]->product->api_user = product->api_user;
71079566063dSJacob Faibussowitsch       PetscCall(MatProductSetFromOptions(mp[cp]));
710808401ef6SPierre Jolivet       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
71099566063dSJacob Faibussowitsch       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
71104e84afc0SStefano Zampini       rmapt[cp] = 2;
71114e84afc0SStefano Zampini       rmapa[cp] = p->garray;
71124e84afc0SStefano Zampini       cmapt[cp] = 2;
71134e84afc0SStefano Zampini       cmapa[cp] = globidx;
71144e84afc0SStefano Zampini       mptmp[cp] = PETSC_FALSE;
71154e84afc0SStefano Zampini       cp++;
71164e84afc0SStefano Zampini     }
71174e84afc0SStefano Zampini     break;
71184e84afc0SStefano Zampini   case MATPRODUCT_PtAP:
71199566063dSJacob Faibussowitsch     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth));
71204e84afc0SStefano Zampini     /* P is product->B */
71219566063dSJacob Faibussowitsch     PetscCall(MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc));
71229566063dSJacob Faibussowitsch     PetscCall(MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]));
71239566063dSJacob Faibussowitsch     PetscCall(MatProductSetType(mp[cp],MATPRODUCT_PtAP));
71249566063dSJacob Faibussowitsch     PetscCall(MatProductSetFill(mp[cp],product->fill));
71259566063dSJacob Faibussowitsch     PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
71269566063dSJacob Faibussowitsch     PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
71279566063dSJacob Faibussowitsch     PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
71284e84afc0SStefano Zampini     mp[cp]->product->api_user = product->api_user;
71299566063dSJacob Faibussowitsch     PetscCall(MatProductSetFromOptions(mp[cp]));
713008401ef6SPierre Jolivet     PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
71319566063dSJacob Faibussowitsch     PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
71329566063dSJacob Faibussowitsch     PetscCall(ISGetIndices(glob,&globidx));
71334e84afc0SStefano Zampini     rmapt[cp] = 2;
71344e84afc0SStefano Zampini     rmapa[cp] = globidx;
71354e84afc0SStefano Zampini     cmapt[cp] = 2;
71364e84afc0SStefano Zampini     cmapa[cp] = globidx;
71374e84afc0SStefano Zampini     mptmp[cp] = PETSC_FALSE;
71384e84afc0SStefano Zampini     cp++;
71394e84afc0SStefano Zampini     if (mmdata->P_oth) {
71409566063dSJacob Faibussowitsch       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g));
71419566063dSJacob Faibussowitsch       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx));
71429566063dSJacob Faibussowitsch       PetscCall(MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name));
71439566063dSJacob Faibussowitsch       PetscCall(MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind));
71449566063dSJacob Faibussowitsch       PetscCall(MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]));
71459566063dSJacob Faibussowitsch       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB));
71469566063dSJacob Faibussowitsch       PetscCall(MatProductSetFill(mp[cp],product->fill));
71479566063dSJacob Faibussowitsch       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
71489566063dSJacob Faibussowitsch       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
71499566063dSJacob Faibussowitsch       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
71504e84afc0SStefano Zampini       mp[cp]->product->api_user = product->api_user;
71519566063dSJacob Faibussowitsch       PetscCall(MatProductSetFromOptions(mp[cp]));
715208401ef6SPierre Jolivet       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
71539566063dSJacob Faibussowitsch       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
71544e84afc0SStefano Zampini       mptmp[cp] = PETSC_TRUE;
71554e84afc0SStefano Zampini       cp++;
71569566063dSJacob Faibussowitsch       PetscCall(MatProductCreate(mmdata->Bloc,mp[1],NULL,&mp[cp]));
71579566063dSJacob Faibussowitsch       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB));
71589566063dSJacob Faibussowitsch       PetscCall(MatProductSetFill(mp[cp],product->fill));
71599566063dSJacob Faibussowitsch       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
71609566063dSJacob Faibussowitsch       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
71619566063dSJacob Faibussowitsch       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
71624e84afc0SStefano Zampini       mp[cp]->product->api_user = product->api_user;
71639566063dSJacob Faibussowitsch       PetscCall(MatProductSetFromOptions(mp[cp]));
716408401ef6SPierre Jolivet       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
71659566063dSJacob Faibussowitsch       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
71664e84afc0SStefano Zampini       rmapt[cp] = 2;
71674e84afc0SStefano Zampini       rmapa[cp] = globidx;
71684e84afc0SStefano Zampini       cmapt[cp] = 2;
71694e84afc0SStefano Zampini       cmapa[cp] = P_oth_idx;
71704e84afc0SStefano Zampini       mptmp[cp] = PETSC_FALSE;
71714e84afc0SStefano Zampini       cp++;
71724e84afc0SStefano Zampini     }
71734e84afc0SStefano Zampini     break;
71744e84afc0SStefano Zampini   default:
717598921bdaSJacob Faibussowitsch     SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]);
71764e84afc0SStefano Zampini   }
71774e84afc0SStefano Zampini   /* sanity check */
7178*aed4548fSBarry Smith   if (size > 1) for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected offproc map type for product %" PetscInt_FMT,i);
71794e84afc0SStefano Zampini 
71809566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(cp,&mmdata->mp,cp,&mmdata->mptmp));
7181ddea5d60SJunchao Zhang   for (i = 0; i < cp; i++) {
7182ddea5d60SJunchao Zhang     mmdata->mp[i]    = mp[i];
7183ddea5d60SJunchao Zhang     mmdata->mptmp[i] = mptmp[i];
7184ddea5d60SJunchao Zhang   }
71854e84afc0SStefano Zampini   mmdata->cp = cp;
71864e84afc0SStefano Zampini   C->product->data       = mmdata;
71874e84afc0SStefano Zampini   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
71884e84afc0SStefano Zampini   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
71894e84afc0SStefano Zampini 
7190c215019aSStefano Zampini   /* memory type */
7191c215019aSStefano Zampini   mmdata->mtype = PETSC_MEMTYPE_HOST;
71929566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&iscuda,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,""));
71939566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&iskokk,MATSEQAIJKOKKOS,MATMPIAIJKOKKOS,""));
7194c215019aSStefano Zampini   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
71953214990dSStefano Zampini   else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS;
7196c215019aSStefano Zampini 
71974e84afc0SStefano Zampini   /* prepare coo coordinates for values insertion */
7198ddea5d60SJunchao Zhang 
7199ddea5d60SJunchao Zhang   /* count total nonzeros of those intermediate seqaij Mats
7200ddea5d60SJunchao Zhang     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
7201ddea5d60SJunchao Zhang     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
7202ddea5d60SJunchao Zhang     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
7203ddea5d60SJunchao Zhang   */
72044e84afc0SStefano Zampini   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
72054e84afc0SStefano Zampini     Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data;
72064e84afc0SStefano Zampini     if (mptmp[cp]) continue;
7207ddea5d60SJunchao Zhang     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
72084e84afc0SStefano Zampini       const PetscInt *rmap = rmapa[cp];
72094e84afc0SStefano Zampini       const PetscInt mr = mp[cp]->rmap->n;
72104e84afc0SStefano Zampini       const PetscInt rs = C->rmap->rstart;
72114e84afc0SStefano Zampini       const PetscInt re = C->rmap->rend;
72124e84afc0SStefano Zampini       const PetscInt *ii  = mm->i;
72134e84afc0SStefano Zampini       for (i = 0; i < mr; i++) {
72144e84afc0SStefano Zampini         const PetscInt gr = rmap[i];
72154e84afc0SStefano Zampini         const PetscInt nz = ii[i+1] - ii[i];
7216ddea5d60SJunchao Zhang         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
7217ddea5d60SJunchao Zhang         else ncoo_oown += nz; /* this row is local */
72184e84afc0SStefano Zampini       }
72194e84afc0SStefano Zampini     } else ncoo_d += mm->nz;
72204e84afc0SStefano Zampini   }
7221ddea5d60SJunchao Zhang 
7222ddea5d60SJunchao Zhang   /*
7223ddea5d60SJunchao Zhang     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
7224ddea5d60SJunchao Zhang 
7225ddea5d60SJunchao Zhang     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
7226ddea5d60SJunchao Zhang 
7227ddea5d60SJunchao Zhang     off[0] points to a big index array, which is shared by off[1,2,...]. Similarily, for own[0].
7228ddea5d60SJunchao Zhang 
7229ddea5d60SJunchao Zhang     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
7230ddea5d60SJunchao Zhang     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
7231ddea5d60SJunchao Zhang     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
7232ddea5d60SJunchao Zhang 
7233ddea5d60SJunchao Zhang     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
7234ddea5d60SJunchao Zhang     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaing part stores i of nonzeros I will receive.
7235ddea5d60SJunchao Zhang   */
72369566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(mmdata->cp+1,&mmdata->off)); /* +1 to make a csr-like data structure */
72379566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(mmdata->cp+1,&mmdata->own));
7238ddea5d60SJunchao Zhang 
7239ddea5d60SJunchao Zhang   /* gather (i,j) of nonzeros inserted by remote procs */
7240ddea5d60SJunchao Zhang   if (hasoffproc) {
72414e84afc0SStefano Zampini     PetscSF  msf;
72424e84afc0SStefano Zampini     PetscInt ncoo2,*coo_i2,*coo_j2;
72434e84afc0SStefano Zampini 
72449566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(ncoo_o,&mmdata->off[0]));
72459566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(ncoo_oown,&mmdata->own[0]));
72469566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(ncoo_o,&coo_i,ncoo_o,&coo_j)); /* to collect (i,j) of entries to be sent to others */
7247ddea5d60SJunchao Zhang 
72484e84afc0SStefano Zampini     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
72494e84afc0SStefano Zampini       Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data;
72504e84afc0SStefano Zampini       PetscInt   *idxoff = mmdata->off[cp];
72514e84afc0SStefano Zampini       PetscInt   *idxown = mmdata->own[cp];
7252ddea5d60SJunchao Zhang       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
72534e84afc0SStefano Zampini         const PetscInt *rmap = rmapa[cp];
72544e84afc0SStefano Zampini         const PetscInt *cmap = cmapa[cp];
72554e84afc0SStefano Zampini         const PetscInt *ii  = mm->i;
72564e84afc0SStefano Zampini         PetscInt       *coi = coo_i + ncoo_o;
72574e84afc0SStefano Zampini         PetscInt       *coj = coo_j + ncoo_o;
72584e84afc0SStefano Zampini         const PetscInt mr = mp[cp]->rmap->n;
72594e84afc0SStefano Zampini         const PetscInt rs = C->rmap->rstart;
72604e84afc0SStefano Zampini         const PetscInt re = C->rmap->rend;
72614e84afc0SStefano Zampini         const PetscInt cs = C->cmap->rstart;
72624e84afc0SStefano Zampini         for (i = 0; i < mr; i++) {
72634e84afc0SStefano Zampini           const PetscInt *jj = mm->j + ii[i];
72644e84afc0SStefano Zampini           const PetscInt gr  = rmap[i];
72654e84afc0SStefano Zampini           const PetscInt nz  = ii[i+1] - ii[i];
7266ddea5d60SJunchao Zhang           if (gr < rs || gr >= re) { /* this is an offproc row */
72674e84afc0SStefano Zampini             for (j = ii[i]; j < ii[i+1]; j++) {
72684e84afc0SStefano Zampini               *coi++ = gr;
72694e84afc0SStefano Zampini               *idxoff++ = j;
72704e84afc0SStefano Zampini             }
72714e84afc0SStefano Zampini             if (!cmapt[cp]) { /* already global */
72724e84afc0SStefano Zampini               for (j = 0; j < nz; j++) *coj++ = jj[j];
72734e84afc0SStefano Zampini             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
72744e84afc0SStefano Zampini               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
72754e84afc0SStefano Zampini             } else { /* offdiag */
72764e84afc0SStefano Zampini               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
72774e84afc0SStefano Zampini             }
72784e84afc0SStefano Zampini             ncoo_o += nz;
7279ddea5d60SJunchao Zhang           } else { /* this is a local row */
72804e84afc0SStefano Zampini             for (j = ii[i]; j < ii[i+1]; j++) *idxown++ = j;
72814e84afc0SStefano Zampini           }
72824e84afc0SStefano Zampini         }
72834e84afc0SStefano Zampini       }
72844e84afc0SStefano Zampini       mmdata->off[cp + 1] = idxoff;
72854e84afc0SStefano Zampini       mmdata->own[cp + 1] = idxown;
72864e84afc0SStefano Zampini     }
72874e84afc0SStefano Zampini 
72889566063dSJacob Faibussowitsch     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf));
72899566063dSJacob Faibussowitsch     PetscCall(PetscSFSetGraphLayout(mmdata->sf,C->rmap,ncoo_o/*nleaves*/,NULL/*ilocal*/,PETSC_OWN_POINTER,coo_i));
72909566063dSJacob Faibussowitsch     PetscCall(PetscSFGetMultiSF(mmdata->sf,&msf));
72919566063dSJacob Faibussowitsch     PetscCall(PetscSFGetGraph(msf,&ncoo2/*nroots*/,NULL,NULL,NULL));
72924e84afc0SStefano Zampini     ncoo = ncoo_d + ncoo_oown + ncoo2;
72939566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(ncoo,&coo_i2,ncoo,&coo_j2));
72949566063dSJacob Faibussowitsch     PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */
72959566063dSJacob Faibussowitsch     PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown));
72969566063dSJacob Faibussowitsch     PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown));
72979566063dSJacob Faibussowitsch     PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown));
72989566063dSJacob Faibussowitsch     PetscCall(PetscFree2(coo_i,coo_j));
7299ddea5d60SJunchao Zhang     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
73009566063dSJacob Faibussowitsch     PetscCall(PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo_o*sizeof(PetscScalar),(void**)&mmdata->coo_w));
73014e84afc0SStefano Zampini     coo_i = coo_i2;
73024e84afc0SStefano Zampini     coo_j = coo_j2;
73034e84afc0SStefano Zampini   } else { /* no offproc values insertion */
73044e84afc0SStefano Zampini     ncoo = ncoo_d;
73059566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(ncoo,&coo_i,ncoo,&coo_j));
7306c215019aSStefano Zampini 
73079566063dSJacob Faibussowitsch     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf));
73089566063dSJacob Faibussowitsch     PetscCall(PetscSFSetGraph(mmdata->sf,0,0,NULL,PETSC_OWN_POINTER,NULL,PETSC_OWN_POINTER));
73099566063dSJacob Faibussowitsch     PetscCall(PetscSFSetUp(mmdata->sf));
73104e84afc0SStefano Zampini   }
7311c215019aSStefano Zampini   mmdata->hasoffproc = hasoffproc;
73124e84afc0SStefano Zampini 
7313ddea5d60SJunchao Zhang   /* gather (i,j) of nonzeros inserted locally */
73144e84afc0SStefano Zampini   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
73154e84afc0SStefano Zampini     Mat_SeqAIJ     *mm = (Mat_SeqAIJ*)mp[cp]->data;
73164e84afc0SStefano Zampini     PetscInt       *coi = coo_i + ncoo_d;
73174e84afc0SStefano Zampini     PetscInt       *coj = coo_j + ncoo_d;
73184e84afc0SStefano Zampini     const PetscInt *jj  = mm->j;
73194e84afc0SStefano Zampini     const PetscInt *ii  = mm->i;
73204e84afc0SStefano Zampini     const PetscInt *cmap = cmapa[cp];
73214e84afc0SStefano Zampini     const PetscInt *rmap = rmapa[cp];
73224e84afc0SStefano Zampini     const PetscInt mr = mp[cp]->rmap->n;
73234e84afc0SStefano Zampini     const PetscInt rs = C->rmap->rstart;
73244e84afc0SStefano Zampini     const PetscInt re = C->rmap->rend;
73254e84afc0SStefano Zampini     const PetscInt cs = C->cmap->rstart;
73264e84afc0SStefano Zampini 
73274e84afc0SStefano Zampini     if (mptmp[cp]) continue;
7328ddea5d60SJunchao Zhang     if (rmapt[cp] == 1) { /* consecutive rows */
7329ddea5d60SJunchao Zhang       /* fill coo_i */
73304e84afc0SStefano Zampini       for (i = 0; i < mr; i++) {
73314e84afc0SStefano Zampini         const PetscInt gr = i + rs;
73324e84afc0SStefano Zampini         for (j = ii[i]; j < ii[i+1]; j++) coi[j] = gr;
73334e84afc0SStefano Zampini       }
7334ddea5d60SJunchao Zhang       /* fill coo_j */
7335ddea5d60SJunchao Zhang       if (!cmapt[cp]) { /* type-0, already global */
73369566063dSJacob Faibussowitsch         PetscCall(PetscArraycpy(coj,jj,mm->nz));
7337ddea5d60SJunchao Zhang       } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */
7338ddea5d60SJunchao Zhang         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
7339ddea5d60SJunchao Zhang       } else { /* type-2, local to global for sparse columns */
73404e84afc0SStefano Zampini         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
73414e84afc0SStefano Zampini       }
73424e84afc0SStefano Zampini       ncoo_d += mm->nz;
7343ddea5d60SJunchao Zhang     } else if (rmapt[cp] == 2) { /* sparse rows */
73444e84afc0SStefano Zampini       for (i = 0; i < mr; i++) {
73454e84afc0SStefano Zampini         const PetscInt *jj = mm->j + ii[i];
73464e84afc0SStefano Zampini         const PetscInt gr  = rmap[i];
73474e84afc0SStefano Zampini         const PetscInt nz  = ii[i+1] - ii[i];
7348ddea5d60SJunchao Zhang         if (gr >= rs && gr < re) { /* local rows */
73494e84afc0SStefano Zampini           for (j = ii[i]; j < ii[i+1]; j++) *coi++ = gr;
7350ddea5d60SJunchao Zhang           if (!cmapt[cp]) { /* type-0, already global */
73514e84afc0SStefano Zampini             for (j = 0; j < nz; j++) *coj++ = jj[j];
73524e84afc0SStefano Zampini           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
73534e84afc0SStefano Zampini             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7354ddea5d60SJunchao Zhang           } else { /* type-2, local to global for sparse columns */
73554e84afc0SStefano Zampini             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
73564e84afc0SStefano Zampini           }
73574e84afc0SStefano Zampini           ncoo_d += nz;
73584e84afc0SStefano Zampini         }
73594e84afc0SStefano Zampini       }
73604e84afc0SStefano Zampini     }
73614e84afc0SStefano Zampini   }
73624e84afc0SStefano Zampini   if (glob) {
73639566063dSJacob Faibussowitsch     PetscCall(ISRestoreIndices(glob,&globidx));
73644e84afc0SStefano Zampini   }
73659566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&glob));
73664e84afc0SStefano Zampini   if (P_oth_l2g) {
73679566063dSJacob Faibussowitsch     PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g,&P_oth_idx));
73684e84afc0SStefano Zampini   }
73699566063dSJacob Faibussowitsch   PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g));
7370ddea5d60SJunchao Zhang   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
73719566063dSJacob Faibussowitsch   PetscCall(PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo*sizeof(PetscScalar),(void**)&mmdata->coo_v));
73724e84afc0SStefano Zampini 
73734e84afc0SStefano Zampini   /* preallocate with COO data */
73749566063dSJacob Faibussowitsch   PetscCall(MatSetPreallocationCOO(C,ncoo,coo_i,coo_j));
73759566063dSJacob Faibussowitsch   PetscCall(PetscFree2(coo_i,coo_j));
73764e84afc0SStefano Zampini   PetscFunctionReturn(0);
73774e84afc0SStefano Zampini }
73784e84afc0SStefano Zampini 
73794e84afc0SStefano Zampini PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
73804e84afc0SStefano Zampini {
73814e84afc0SStefano Zampini   Mat_Product *product = mat->product;
73824e84afc0SStefano Zampini #if defined(PETSC_HAVE_DEVICE)
73834e84afc0SStefano Zampini   PetscBool    match   = PETSC_FALSE;
7384abb89eb1SStefano Zampini   PetscBool    usecpu  = PETSC_FALSE;
73854e84afc0SStefano Zampini #else
73864e84afc0SStefano Zampini   PetscBool    match   = PETSC_TRUE;
73874e84afc0SStefano Zampini #endif
73884e84afc0SStefano Zampini 
73894e84afc0SStefano Zampini   PetscFunctionBegin;
73904e84afc0SStefano Zampini   MatCheckProduct(mat,1);
73914e84afc0SStefano Zampini #if defined(PETSC_HAVE_DEVICE)
73924e84afc0SStefano Zampini   if (!product->A->boundtocpu && !product->B->boundtocpu) {
73939566063dSJacob Faibussowitsch     PetscCall(PetscObjectTypeCompare((PetscObject)product->B,((PetscObject)product->A)->type_name,&match));
73944e84afc0SStefano Zampini   }
739565e4b4d4SStefano Zampini   if (match) { /* we can always fallback to the CPU if requested */
7396abb89eb1SStefano Zampini     switch (product->type) {
7397abb89eb1SStefano Zampini     case MATPRODUCT_AB:
7398abb89eb1SStefano Zampini       if (product->api_user) {
7399d0609cedSBarry Smith         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatMatMult","Mat");
74009566063dSJacob Faibussowitsch         PetscCall(PetscOptionsBool("-matmatmult_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL));
7401d0609cedSBarry Smith         PetscOptionsEnd();
7402abb89eb1SStefano Zampini       } else {
7403d0609cedSBarry Smith         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AB","Mat");
74049566063dSJacob Faibussowitsch         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL));
7405d0609cedSBarry Smith         PetscOptionsEnd();
7406abb89eb1SStefano Zampini       }
7407abb89eb1SStefano Zampini       break;
7408abb89eb1SStefano Zampini     case MATPRODUCT_AtB:
7409abb89eb1SStefano Zampini       if (product->api_user) {
7410d0609cedSBarry Smith         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatTransposeMatMult","Mat");
74119566063dSJacob Faibussowitsch         PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL));
7412d0609cedSBarry Smith         PetscOptionsEnd();
7413abb89eb1SStefano Zampini       } else {
7414d0609cedSBarry Smith         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AtB","Mat");
74159566063dSJacob Faibussowitsch         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL));
7416d0609cedSBarry Smith         PetscOptionsEnd();
7417abb89eb1SStefano Zampini       }
7418abb89eb1SStefano Zampini       break;
7419abb89eb1SStefano Zampini     case MATPRODUCT_PtAP:
7420abb89eb1SStefano Zampini       if (product->api_user) {
7421d0609cedSBarry Smith         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatPtAP","Mat");
74229566063dSJacob Faibussowitsch         PetscCall(PetscOptionsBool("-matptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL));
7423d0609cedSBarry Smith         PetscOptionsEnd();
7424abb89eb1SStefano Zampini       } else {
7425d0609cedSBarry Smith         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_PtAP","Mat");
74269566063dSJacob Faibussowitsch         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL));
7427d0609cedSBarry Smith         PetscOptionsEnd();
7428abb89eb1SStefano Zampini       }
7429abb89eb1SStefano Zampini       break;
7430abb89eb1SStefano Zampini     default:
7431abb89eb1SStefano Zampini       break;
7432abb89eb1SStefano Zampini     }
7433abb89eb1SStefano Zampini     match = (PetscBool)!usecpu;
7434abb89eb1SStefano Zampini   }
74354e84afc0SStefano Zampini #endif
74364e84afc0SStefano Zampini   if (match) {
74374e84afc0SStefano Zampini     switch (product->type) {
74384e84afc0SStefano Zampini     case MATPRODUCT_AB:
74394e84afc0SStefano Zampini     case MATPRODUCT_AtB:
74404e84afc0SStefano Zampini     case MATPRODUCT_PtAP:
74414e84afc0SStefano Zampini       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
74424e84afc0SStefano Zampini       break;
74434e84afc0SStefano Zampini     default:
74444e84afc0SStefano Zampini       break;
74454e84afc0SStefano Zampini     }
74464e84afc0SStefano Zampini   }
74474e84afc0SStefano Zampini   /* fallback to MPIAIJ ops */
74489566063dSJacob Faibussowitsch   if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat));
74494e84afc0SStefano Zampini   PetscFunctionReturn(0);
745081824310SBarry Smith }
745198921bdaSJacob Faibussowitsch 
745298921bdaSJacob Faibussowitsch /*
745398921bdaSJacob Faibussowitsch     Special version for direct calls from Fortran
745498921bdaSJacob Faibussowitsch */
745598921bdaSJacob Faibussowitsch #include <petsc/private/fortranimpl.h>
745698921bdaSJacob Faibussowitsch 
745798921bdaSJacob Faibussowitsch /* Change these macros so can be used in void function */
74589566063dSJacob Faibussowitsch /* Identical to PetscCallVoid, except it assigns to *_ierr */
74599566063dSJacob Faibussowitsch #undef  PetscCall
74609566063dSJacob Faibussowitsch #define PetscCall(...) do {                                                                    \
74615f80ce2aSJacob Faibussowitsch     PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__;                                              \
746298921bdaSJacob Faibussowitsch     if (PetscUnlikely(ierr_msv_mpiaij)) {                                                      \
746398921bdaSJacob Faibussowitsch       *_ierr = PetscError(PETSC_COMM_SELF,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr_msv_mpiaij,PETSC_ERROR_REPEAT," "); \
746498921bdaSJacob Faibussowitsch       return;                                                                                  \
746598921bdaSJacob Faibussowitsch     }                                                                                          \
746698921bdaSJacob Faibussowitsch   } while (0)
746798921bdaSJacob Faibussowitsch 
746898921bdaSJacob Faibussowitsch #undef SETERRQ
746998921bdaSJacob Faibussowitsch #define SETERRQ(comm,ierr,...) do {                                                            \
747098921bdaSJacob Faibussowitsch     *_ierr = PetscError(comm,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr,PETSC_ERROR_INITIAL,__VA_ARGS__); \
747198921bdaSJacob Faibussowitsch     return;                                                                                    \
747298921bdaSJacob Faibussowitsch   } while (0)
747398921bdaSJacob Faibussowitsch 
747498921bdaSJacob Faibussowitsch #if defined(PETSC_HAVE_FORTRAN_CAPS)
747598921bdaSJacob Faibussowitsch #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
747698921bdaSJacob Faibussowitsch #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
747798921bdaSJacob Faibussowitsch #define matsetvaluesmpiaij_ matsetvaluesmpiaij
747898921bdaSJacob Faibussowitsch #else
747998921bdaSJacob Faibussowitsch #endif
748098921bdaSJacob Faibussowitsch PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
748198921bdaSJacob Faibussowitsch {
748298921bdaSJacob Faibussowitsch   Mat          mat  = *mmat;
748398921bdaSJacob Faibussowitsch   PetscInt     m    = *mm, n = *mn;
748498921bdaSJacob Faibussowitsch   InsertMode   addv = *maddv;
748598921bdaSJacob Faibussowitsch   Mat_MPIAIJ  *aij  = (Mat_MPIAIJ*)mat->data;
748698921bdaSJacob Faibussowitsch   PetscScalar  value;
748798921bdaSJacob Faibussowitsch 
748898921bdaSJacob Faibussowitsch   MatCheckPreallocated(mat,1);
748998921bdaSJacob Faibussowitsch   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
74905f80ce2aSJacob Faibussowitsch   else PetscCheck(mat->insertmode == addv,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
749198921bdaSJacob Faibussowitsch   {
749298921bdaSJacob Faibussowitsch     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
749398921bdaSJacob Faibussowitsch     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
749498921bdaSJacob Faibussowitsch     PetscBool roworiented = aij->roworiented;
749598921bdaSJacob Faibussowitsch 
749698921bdaSJacob Faibussowitsch     /* Some Variables required in the macro */
749798921bdaSJacob Faibussowitsch     Mat        A                    = aij->A;
749898921bdaSJacob Faibussowitsch     Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
749998921bdaSJacob Faibussowitsch     PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
750098921bdaSJacob Faibussowitsch     MatScalar  *aa;
750198921bdaSJacob Faibussowitsch     PetscBool  ignorezeroentries    = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
750298921bdaSJacob Faibussowitsch     Mat        B                    = aij->B;
750398921bdaSJacob Faibussowitsch     Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
750498921bdaSJacob Faibussowitsch     PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
750598921bdaSJacob Faibussowitsch     MatScalar  *ba;
750698921bdaSJacob Faibussowitsch     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
750798921bdaSJacob Faibussowitsch      * cannot use "#if defined" inside a macro. */
750898921bdaSJacob Faibussowitsch     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
750998921bdaSJacob Faibussowitsch 
751098921bdaSJacob Faibussowitsch     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
751198921bdaSJacob Faibussowitsch     PetscInt  nonew = a->nonew;
751298921bdaSJacob Faibussowitsch     MatScalar *ap1,*ap2;
751398921bdaSJacob Faibussowitsch 
751498921bdaSJacob Faibussowitsch     PetscFunctionBegin;
75159566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJGetArray(A,&aa));
75169566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJGetArray(B,&ba));
751798921bdaSJacob Faibussowitsch     for (i=0; i<m; i++) {
751898921bdaSJacob Faibussowitsch       if (im[i] < 0) continue;
75196bdcaf15SBarry Smith       PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1);
752098921bdaSJacob Faibussowitsch       if (im[i] >= rstart && im[i] < rend) {
752198921bdaSJacob Faibussowitsch         row      = im[i] - rstart;
752298921bdaSJacob Faibussowitsch         lastcol1 = -1;
752398921bdaSJacob Faibussowitsch         rp1      = aj + ai[row];
752498921bdaSJacob Faibussowitsch         ap1      = aa + ai[row];
752598921bdaSJacob Faibussowitsch         rmax1    = aimax[row];
752698921bdaSJacob Faibussowitsch         nrow1    = ailen[row];
752798921bdaSJacob Faibussowitsch         low1     = 0;
752898921bdaSJacob Faibussowitsch         high1    = nrow1;
752998921bdaSJacob Faibussowitsch         lastcol2 = -1;
753098921bdaSJacob Faibussowitsch         rp2      = bj + bi[row];
753198921bdaSJacob Faibussowitsch         ap2      = ba + bi[row];
753298921bdaSJacob Faibussowitsch         rmax2    = bimax[row];
753398921bdaSJacob Faibussowitsch         nrow2    = bilen[row];
753498921bdaSJacob Faibussowitsch         low2     = 0;
753598921bdaSJacob Faibussowitsch         high2    = nrow2;
753698921bdaSJacob Faibussowitsch 
753798921bdaSJacob Faibussowitsch         for (j=0; j<n; j++) {
753898921bdaSJacob Faibussowitsch           if (roworiented) value = v[i*n+j];
753998921bdaSJacob Faibussowitsch           else value = v[i+j*m];
754098921bdaSJacob Faibussowitsch           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
754198921bdaSJacob Faibussowitsch           if (in[j] >= cstart && in[j] < cend) {
754298921bdaSJacob Faibussowitsch             col = in[j] - cstart;
754398921bdaSJacob Faibussowitsch             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
754498921bdaSJacob Faibussowitsch           } else if (in[j] < 0) continue;
754598921bdaSJacob Faibussowitsch           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
754698921bdaSJacob Faibussowitsch             /* extra brace on SETERRQ() is required for --with-errorchecking=0 - due to the next 'else' clause */
754763a3b9bcSJacob Faibussowitsch             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1);
754898921bdaSJacob Faibussowitsch           } else {
754998921bdaSJacob Faibussowitsch             if (mat->was_assembled) {
755098921bdaSJacob Faibussowitsch               if (!aij->colmap) {
75519566063dSJacob Faibussowitsch                 PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
755298921bdaSJacob Faibussowitsch               }
755398921bdaSJacob Faibussowitsch #if defined(PETSC_USE_CTABLE)
75549566063dSJacob Faibussowitsch               PetscCall(PetscTableFind(aij->colmap,in[j]+1,&col));
755598921bdaSJacob Faibussowitsch               col--;
755698921bdaSJacob Faibussowitsch #else
755798921bdaSJacob Faibussowitsch               col = aij->colmap[in[j]] - 1;
755898921bdaSJacob Faibussowitsch #endif
755998921bdaSJacob Faibussowitsch               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
75609566063dSJacob Faibussowitsch                 PetscCall(MatDisAssemble_MPIAIJ(mat));
756198921bdaSJacob Faibussowitsch                 col  =  in[j];
756298921bdaSJacob Faibussowitsch                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
756398921bdaSJacob Faibussowitsch                 B        = aij->B;
756498921bdaSJacob Faibussowitsch                 b        = (Mat_SeqAIJ*)B->data;
756598921bdaSJacob Faibussowitsch                 bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
756698921bdaSJacob Faibussowitsch                 rp2      = bj + bi[row];
756798921bdaSJacob Faibussowitsch                 ap2      = ba + bi[row];
756898921bdaSJacob Faibussowitsch                 rmax2    = bimax[row];
756998921bdaSJacob Faibussowitsch                 nrow2    = bilen[row];
757098921bdaSJacob Faibussowitsch                 low2     = 0;
757198921bdaSJacob Faibussowitsch                 high2    = nrow2;
757298921bdaSJacob Faibussowitsch                 bm       = aij->B->rmap->n;
757398921bdaSJacob Faibussowitsch                 ba       = b->a;
757498921bdaSJacob Faibussowitsch                 inserted = PETSC_FALSE;
757598921bdaSJacob Faibussowitsch               }
757698921bdaSJacob Faibussowitsch             } else col = in[j];
757798921bdaSJacob Faibussowitsch             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
757898921bdaSJacob Faibussowitsch           }
757998921bdaSJacob Faibussowitsch         }
758098921bdaSJacob Faibussowitsch       } else if (!aij->donotstash) {
758198921bdaSJacob Faibussowitsch         if (roworiented) {
75829566063dSJacob Faibussowitsch           PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
758398921bdaSJacob Faibussowitsch         } else {
75849566063dSJacob Faibussowitsch           PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
758598921bdaSJacob Faibussowitsch         }
758698921bdaSJacob Faibussowitsch       }
758798921bdaSJacob Faibussowitsch     }
75889566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJRestoreArray(A,&aa));
75899566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJRestoreArray(B,&ba));
759098921bdaSJacob Faibussowitsch   }
759198921bdaSJacob Faibussowitsch   PetscFunctionReturnVoid();
759298921bdaSJacob Faibussowitsch }
759398921bdaSJacob Faibussowitsch /* Undefining these here since they were redefined from their original definition above! No
759498921bdaSJacob Faibussowitsch  * other PETSc functions should be defined past this point, as it is impossible to recover the
759598921bdaSJacob Faibussowitsch  * original definitions */
75969566063dSJacob Faibussowitsch #undef PetscCall
759798921bdaSJacob Faibussowitsch #undef SETERRQ
7598