1be1d678aSKris Buschelman #define PETSCMAT_DLL 28a729477SBarry Smith 37c4f633dSBarry Smith #include "../src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 48a729477SBarry Smith 5dd6ea824SBarry Smith #undef __FUNCT__ 6dd6ea824SBarry Smith #define __FUNCT__ "MatDistribute_MPIAIJ" 7dd6ea824SBarry Smith /* 8dd6ea824SBarry Smith Distributes a SeqAIJ matrix across a set of processes. Code stolen from 9dd6ea824SBarry Smith MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 10dd6ea824SBarry Smith 11dd6ea824SBarry Smith Only for square matrices 12dd6ea824SBarry Smith */ 13dd6ea824SBarry Smith PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 14dd6ea824SBarry Smith { 15dd6ea824SBarry Smith PetscMPIInt rank,size; 16dd6ea824SBarry Smith PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 17dd6ea824SBarry Smith PetscErrorCode ierr; 18dd6ea824SBarry Smith Mat mat; 19dd6ea824SBarry Smith Mat_SeqAIJ *gmata; 20dd6ea824SBarry Smith PetscMPIInt tag; 21dd6ea824SBarry Smith MPI_Status status; 22dd6ea824SBarry Smith PetscTruth aij; 23dd6ea824SBarry Smith MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 24dd6ea824SBarry Smith 25dd6ea824SBarry Smith PetscFunctionBegin; 26dd6ea824SBarry Smith CHKMEMQ; 27dd6ea824SBarry Smith ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 28dd6ea824SBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 29dd6ea824SBarry Smith if (!rank) { 30dd6ea824SBarry Smith ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 31dd6ea824SBarry Smith if (!aij) SETERRQ1(PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 32dd6ea824SBarry Smith } 33dd6ea824SBarry Smith if (reuse == MAT_INITIAL_MATRIX) { 34dd6ea824SBarry Smith ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 35dd6ea824SBarry Smith ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 36dd6ea824SBarry Smith ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 37dd6ea824SBarry Smith ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 38dd6ea824SBarry Smith ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 39dd6ea824SBarry Smith ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 40dd6ea824SBarry Smith rowners[0] = 0; 41dd6ea824SBarry Smith for (i=2; i<=size; i++) { 42dd6ea824SBarry Smith rowners[i] += rowners[i-1]; 43dd6ea824SBarry Smith } 44dd6ea824SBarry Smith rstart = rowners[rank]; 45dd6ea824SBarry Smith rend = rowners[rank+1]; 46dd6ea824SBarry Smith ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 47dd6ea824SBarry Smith if (!rank) { 48dd6ea824SBarry Smith gmata = (Mat_SeqAIJ*) gmat->data; 49dd6ea824SBarry Smith /* send row lengths to all processors */ 50dd6ea824SBarry Smith for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 51dd6ea824SBarry Smith for (i=1; i<size; i++) { 52dd6ea824SBarry Smith ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 53dd6ea824SBarry Smith } 54dd6ea824SBarry Smith /* determine number diagonal and off-diagonal counts */ 55dd6ea824SBarry Smith ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 56dd6ea824SBarry Smith ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 57dd6ea824SBarry Smith ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 58dd6ea824SBarry Smith jj = 0; 59dd6ea824SBarry Smith for (i=0; i<m; i++) { 60dd6ea824SBarry Smith for (j=0; j<dlens[i]; j++) { 61dd6ea824SBarry Smith if (gmata->j[jj] < rstart) ld[i]++; 62dd6ea824SBarry Smith if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 63dd6ea824SBarry Smith jj++; 64dd6ea824SBarry Smith } 65dd6ea824SBarry Smith } 66dd6ea824SBarry Smith /* send column indices to other processes */ 67dd6ea824SBarry Smith for (i=1; i<size; i++) { 68dd6ea824SBarry Smith nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 69dd6ea824SBarry Smith ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 70dd6ea824SBarry Smith ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 71dd6ea824SBarry Smith } 72dd6ea824SBarry Smith 73dd6ea824SBarry Smith /* send numerical values to other processes */ 74dd6ea824SBarry Smith for (i=1; i<size; i++) { 75dd6ea824SBarry Smith nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 76dd6ea824SBarry Smith ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 77dd6ea824SBarry Smith } 78dd6ea824SBarry Smith gmataa = gmata->a; 79dd6ea824SBarry Smith gmataj = gmata->j; 80dd6ea824SBarry Smith 81dd6ea824SBarry Smith } else { 82dd6ea824SBarry Smith /* receive row lengths */ 83dd6ea824SBarry Smith ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 84dd6ea824SBarry Smith /* receive column indices */ 85dd6ea824SBarry Smith ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 86dd6ea824SBarry Smith ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 87dd6ea824SBarry Smith ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 88dd6ea824SBarry Smith /* determine number diagonal and off-diagonal counts */ 89dd6ea824SBarry Smith ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 90dd6ea824SBarry Smith ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 91dd6ea824SBarry Smith ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 92dd6ea824SBarry Smith jj = 0; 93dd6ea824SBarry Smith for (i=0; i<m; i++) { 94dd6ea824SBarry Smith for (j=0; j<dlens[i]; j++) { 95dd6ea824SBarry Smith if (gmataj[jj] < rstart) ld[i]++; 96dd6ea824SBarry Smith if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 97dd6ea824SBarry Smith jj++; 98dd6ea824SBarry Smith } 99dd6ea824SBarry Smith } 100dd6ea824SBarry Smith /* receive numerical values */ 101dd6ea824SBarry Smith ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 102dd6ea824SBarry Smith ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 103dd6ea824SBarry Smith } 104dd6ea824SBarry Smith /* set preallocation */ 105dd6ea824SBarry Smith for (i=0; i<m; i++) { 106dd6ea824SBarry Smith dlens[i] -= olens[i]; 107dd6ea824SBarry Smith } 108dd6ea824SBarry Smith ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 109dd6ea824SBarry Smith ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 110dd6ea824SBarry Smith 111dd6ea824SBarry Smith for (i=0; i<m; i++) { 112dd6ea824SBarry Smith dlens[i] += olens[i]; 113dd6ea824SBarry Smith } 114dd6ea824SBarry Smith cnt = 0; 115dd6ea824SBarry Smith for (i=0; i<m; i++) { 116dd6ea824SBarry Smith row = rstart + i; 117dd6ea824SBarry Smith ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 118dd6ea824SBarry Smith cnt += dlens[i]; 119dd6ea824SBarry Smith } 120dd6ea824SBarry Smith if (rank) { 121dd6ea824SBarry Smith ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 122dd6ea824SBarry Smith } 123dd6ea824SBarry Smith ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 124dd6ea824SBarry Smith ierr = PetscFree(rowners);CHKERRQ(ierr); 125dd6ea824SBarry Smith ((Mat_MPIAIJ*)(mat->data))->ld = ld; 126dd6ea824SBarry Smith *inmat = mat; 127dd6ea824SBarry Smith } else { /* column indices are already set; only need to move over numerical values from process 0 */ 128dd6ea824SBarry Smith Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 129dd6ea824SBarry Smith Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 130dd6ea824SBarry Smith mat = *inmat; 131dd6ea824SBarry Smith ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 132dd6ea824SBarry Smith if (!rank) { 133dd6ea824SBarry Smith /* send numerical values to other processes */ 134dd6ea824SBarry Smith gmata = (Mat_SeqAIJ*) gmat->data; 135dd6ea824SBarry Smith ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 136dd6ea824SBarry Smith gmataa = gmata->a; 137dd6ea824SBarry Smith for (i=1; i<size; i++) { 138dd6ea824SBarry Smith nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 139dd6ea824SBarry Smith ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 140dd6ea824SBarry Smith } 141dd6ea824SBarry Smith nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 142dd6ea824SBarry Smith } else { 143dd6ea824SBarry Smith /* receive numerical values from process 0*/ 144dd6ea824SBarry Smith nz = Ad->nz + Ao->nz; 145dd6ea824SBarry Smith ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 146dd6ea824SBarry Smith ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 147dd6ea824SBarry Smith } 148dd6ea824SBarry Smith /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 149dd6ea824SBarry Smith ld = ((Mat_MPIAIJ*)(mat->data))->ld; 150dd6ea824SBarry Smith ad = Ad->a; 151dd6ea824SBarry Smith ao = Ao->a; 152d0f46423SBarry Smith if (mat->rmap->n) { 153dd6ea824SBarry Smith i = 0; 154dd6ea824SBarry Smith nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 155dd6ea824SBarry Smith nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 156dd6ea824SBarry Smith } 157d0f46423SBarry Smith for (i=1; i<mat->rmap->n; i++) { 158dd6ea824SBarry Smith nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 159dd6ea824SBarry Smith nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 160dd6ea824SBarry Smith } 161dd6ea824SBarry Smith i--; 162d0f46423SBarry Smith if (mat->rmap->n) { 163dd6ea824SBarry Smith nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 164dd6ea824SBarry Smith } 165dd6ea824SBarry Smith if (rank) { 166dd6ea824SBarry Smith ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 167dd6ea824SBarry Smith } 168dd6ea824SBarry Smith } 169dd6ea824SBarry Smith ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 170dd6ea824SBarry Smith ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 171dd6ea824SBarry Smith CHKMEMQ; 172dd6ea824SBarry Smith PetscFunctionReturn(0); 173dd6ea824SBarry Smith } 174dd6ea824SBarry Smith 1750f5bd95cSBarry Smith /* 1760f5bd95cSBarry Smith Local utility routine that creates a mapping from the global column 1779e25ed09SBarry Smith number to the local number in the off-diagonal part of the local 1780f5bd95cSBarry Smith storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 1790f5bd95cSBarry Smith a slightly higher hash table cost; without it it is not scalable (each processor 1800f5bd95cSBarry Smith has an order N integer array but is fast to acess. 1819e25ed09SBarry Smith */ 1824a2ae208SSatish Balay #undef __FUNCT__ 1834a2ae208SSatish Balay #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 184dfbe8321SBarry Smith PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 1859e25ed09SBarry Smith { 18644a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1876849ba73SBarry Smith PetscErrorCode ierr; 188d0f46423SBarry Smith PetscInt n = aij->B->cmap->n,i; 189dbb450caSBarry Smith 1903a40ed3dSBarry Smith PetscFunctionBegin; 191aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 192273d9f13SBarry Smith ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 193b1fc9764SSatish Balay for (i=0; i<n; i++){ 1940f5bd95cSBarry Smith ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 195b1fc9764SSatish Balay } 196b1fc9764SSatish Balay #else 197d0f46423SBarry Smith ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 198d0f46423SBarry Smith ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 199d0f46423SBarry Smith ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 200905e6a2fSBarry Smith for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 201b1fc9764SSatish Balay #endif 2023a40ed3dSBarry Smith PetscFunctionReturn(0); 2039e25ed09SBarry Smith } 2049e25ed09SBarry Smith 205085a36d4SBarry Smith 2060520107fSSatish Balay #define CHUNKSIZE 15 20730770e4dSSatish Balay #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 2080520107fSSatish Balay { \ 2097cd84e04SBarry Smith if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 210fd3458f5SBarry Smith lastcol1 = col;\ 211fd3458f5SBarry Smith while (high1-low1 > 5) { \ 212fd3458f5SBarry Smith t = (low1+high1)/2; \ 213fd3458f5SBarry Smith if (rp1[t] > col) high1 = t; \ 214fd3458f5SBarry Smith else low1 = t; \ 215ba4e3ef2SSatish Balay } \ 216fd3458f5SBarry Smith for (_i=low1; _i<high1; _i++) { \ 217fd3458f5SBarry Smith if (rp1[_i] > col) break; \ 218fd3458f5SBarry Smith if (rp1[_i] == col) { \ 219fd3458f5SBarry Smith if (addv == ADD_VALUES) ap1[_i] += value; \ 220fd3458f5SBarry Smith else ap1[_i] = value; \ 22130770e4dSSatish Balay goto a_noinsert; \ 2220520107fSSatish Balay } \ 2230520107fSSatish Balay } \ 224e44c0bd4SBarry Smith if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 225e44c0bd4SBarry Smith if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 226085a36d4SBarry Smith if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 227421e10b8SBarry Smith MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 228669a8dbcSSatish Balay N = nrow1++ - 1; a->nz++; high1++; \ 2290520107fSSatish Balay /* shift up all the later entries in this row */ \ 2300520107fSSatish Balay for (ii=N; ii>=_i; ii--) { \ 231fd3458f5SBarry Smith rp1[ii+1] = rp1[ii]; \ 232fd3458f5SBarry Smith ap1[ii+1] = ap1[ii]; \ 2330520107fSSatish Balay } \ 234fd3458f5SBarry Smith rp1[_i] = col; \ 235fd3458f5SBarry Smith ap1[_i] = value; \ 23630770e4dSSatish Balay a_noinsert: ; \ 237fd3458f5SBarry Smith ailen[row] = nrow1; \ 2380520107fSSatish Balay } 2390a198c4cSBarry Smith 240085a36d4SBarry Smith 24130770e4dSSatish Balay #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 24230770e4dSSatish Balay { \ 2437cd84e04SBarry Smith if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 244fd3458f5SBarry Smith lastcol2 = col;\ 245fd3458f5SBarry Smith while (high2-low2 > 5) { \ 246fd3458f5SBarry Smith t = (low2+high2)/2; \ 247fd3458f5SBarry Smith if (rp2[t] > col) high2 = t; \ 248fd3458f5SBarry Smith else low2 = t; \ 249ba4e3ef2SSatish Balay } \ 250fd3458f5SBarry Smith for (_i=low2; _i<high2; _i++) { \ 251fd3458f5SBarry Smith if (rp2[_i] > col) break; \ 252fd3458f5SBarry Smith if (rp2[_i] == col) { \ 253fd3458f5SBarry Smith if (addv == ADD_VALUES) ap2[_i] += value; \ 254fd3458f5SBarry Smith else ap2[_i] = value; \ 25530770e4dSSatish Balay goto b_noinsert; \ 25630770e4dSSatish Balay } \ 25730770e4dSSatish Balay } \ 258e44c0bd4SBarry Smith if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 259e44c0bd4SBarry Smith if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 260085a36d4SBarry Smith if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 261421e10b8SBarry Smith MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 262669a8dbcSSatish Balay N = nrow2++ - 1; b->nz++; high2++; \ 26330770e4dSSatish Balay /* shift up all the later entries in this row */ \ 26430770e4dSSatish Balay for (ii=N; ii>=_i; ii--) { \ 265fd3458f5SBarry Smith rp2[ii+1] = rp2[ii]; \ 266fd3458f5SBarry Smith ap2[ii+1] = ap2[ii]; \ 26730770e4dSSatish Balay } \ 268fd3458f5SBarry Smith rp2[_i] = col; \ 269fd3458f5SBarry Smith ap2[_i] = value; \ 27030770e4dSSatish Balay b_noinsert: ; \ 271fd3458f5SBarry Smith bilen[row] = nrow2; \ 27230770e4dSSatish Balay } 27330770e4dSSatish Balay 2744a2ae208SSatish Balay #undef __FUNCT__ 2752fd7e33dSBarry Smith #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 2762fd7e33dSBarry Smith PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 2772fd7e33dSBarry Smith { 2782fd7e33dSBarry Smith Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 2792fd7e33dSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 2802fd7e33dSBarry Smith PetscErrorCode ierr; 2812fd7e33dSBarry Smith PetscInt l,*garray = mat->garray,diag; 2822fd7e33dSBarry Smith 2832fd7e33dSBarry Smith PetscFunctionBegin; 2842fd7e33dSBarry Smith /* code only works for square matrices A */ 2852fd7e33dSBarry Smith 2862fd7e33dSBarry Smith /* find size of row to the left of the diagonal part */ 2872fd7e33dSBarry Smith ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 2882fd7e33dSBarry Smith row = row - diag; 2892fd7e33dSBarry Smith for (l=0; l<b->i[row+1]-b->i[row]; l++) { 2902fd7e33dSBarry Smith if (garray[b->j[b->i[row]+l]] > diag) break; 2912fd7e33dSBarry Smith } 2922fd7e33dSBarry Smith ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 2932fd7e33dSBarry Smith 2942fd7e33dSBarry Smith /* diagonal part */ 2952fd7e33dSBarry Smith ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 2962fd7e33dSBarry Smith 2972fd7e33dSBarry Smith /* right of diagonal part */ 2982fd7e33dSBarry Smith ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 2992fd7e33dSBarry Smith PetscFunctionReturn(0); 3002fd7e33dSBarry Smith } 3012fd7e33dSBarry Smith 3022fd7e33dSBarry Smith #undef __FUNCT__ 3034a2ae208SSatish Balay #define __FUNCT__ "MatSetValues_MPIAIJ" 304b1d57f15SBarry Smith PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 3058a729477SBarry Smith { 30644a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 30787828ca2SBarry Smith PetscScalar value; 308dfbe8321SBarry Smith PetscErrorCode ierr; 309d0f46423SBarry Smith PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 310d0f46423SBarry Smith PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 311273d9f13SBarry Smith PetscTruth roworiented = aij->roworiented; 3128a729477SBarry Smith 3130520107fSSatish Balay /* Some Variables required in the macro */ 3144ee7247eSSatish Balay Mat A = aij->A; 3154ee7247eSSatish Balay Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 31657809a77SBarry Smith PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 317a77337e4SBarry Smith MatScalar *aa = a->a; 318edb03aefSBarry Smith PetscTruth ignorezeroentries = a->ignorezeroentries; 31930770e4dSSatish Balay Mat B = aij->B; 32030770e4dSSatish Balay Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 321d0f46423SBarry Smith PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 322a77337e4SBarry Smith MatScalar *ba = b->a; 32330770e4dSSatish Balay 324fd3458f5SBarry Smith PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 325fd3458f5SBarry Smith PetscInt nonew = a->nonew; 326a77337e4SBarry Smith MatScalar *ap1,*ap2; 3274ee7247eSSatish Balay 3283a40ed3dSBarry Smith PetscFunctionBegin; 3298a729477SBarry Smith for (i=0; i<m; i++) { 3305ef9f2a5SBarry Smith if (im[i] < 0) continue; 3312515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 332d0f46423SBarry Smith if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 3330a198c4cSBarry Smith #endif 3344b0e389bSBarry Smith if (im[i] >= rstart && im[i] < rend) { 3354b0e389bSBarry Smith row = im[i] - rstart; 336fd3458f5SBarry Smith lastcol1 = -1; 337fd3458f5SBarry Smith rp1 = aj + ai[row]; 338fd3458f5SBarry Smith ap1 = aa + ai[row]; 339fd3458f5SBarry Smith rmax1 = aimax[row]; 340fd3458f5SBarry Smith nrow1 = ailen[row]; 341fd3458f5SBarry Smith low1 = 0; 342fd3458f5SBarry Smith high1 = nrow1; 343fd3458f5SBarry Smith lastcol2 = -1; 344fd3458f5SBarry Smith rp2 = bj + bi[row]; 345d498b1e9SBarry Smith ap2 = ba + bi[row]; 346fd3458f5SBarry Smith rmax2 = bimax[row]; 347d498b1e9SBarry Smith nrow2 = bilen[row]; 348fd3458f5SBarry Smith low2 = 0; 349fd3458f5SBarry Smith high2 = nrow2; 350fd3458f5SBarry Smith 3511eb62cbbSBarry Smith for (j=0; j<n; j++) { 35216371a99SBarry Smith if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 353abc0a331SBarry Smith if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 354fd3458f5SBarry Smith if (in[j] >= cstart && in[j] < cend){ 355fd3458f5SBarry Smith col = in[j] - cstart; 35630770e4dSSatish Balay MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 357273d9f13SBarry Smith } else if (in[j] < 0) continue; 3582515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 359d0f46423SBarry Smith else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 3600a198c4cSBarry Smith #endif 3611eb62cbbSBarry Smith else { 362227d817aSBarry Smith if (mat->was_assembled) { 363905e6a2fSBarry Smith if (!aij->colmap) { 364905e6a2fSBarry Smith ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 365905e6a2fSBarry Smith } 366aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 3670f5bd95cSBarry Smith ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 368fa46199cSSatish Balay col--; 369b1fc9764SSatish Balay #else 370905e6a2fSBarry Smith col = aij->colmap[in[j]] - 1; 371b1fc9764SSatish Balay #endif 372ec8511deSBarry Smith if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 3732493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 3744b0e389bSBarry Smith col = in[j]; 3759bf004c3SSatish Balay /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 376f9508a3cSSatish Balay B = aij->B; 377f9508a3cSSatish Balay b = (Mat_SeqAIJ*)B->data; 378e44c0bd4SBarry Smith bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 379d498b1e9SBarry Smith rp2 = bj + bi[row]; 380d498b1e9SBarry Smith ap2 = ba + bi[row]; 381d498b1e9SBarry Smith rmax2 = bimax[row]; 382d498b1e9SBarry Smith nrow2 = bilen[row]; 383d498b1e9SBarry Smith low2 = 0; 384d498b1e9SBarry Smith high2 = nrow2; 385d0f46423SBarry Smith bm = aij->B->rmap->n; 386f9508a3cSSatish Balay ba = b->a; 387d6dfbf8fSBarry Smith } 388c48de900SBarry Smith } else col = in[j]; 38930770e4dSSatish Balay MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 3901eb62cbbSBarry Smith } 3911eb62cbbSBarry Smith } 3925ef9f2a5SBarry Smith } else { 39390f02eecSBarry Smith if (!aij->donotstash) { 394d36fbae8SSatish Balay if (roworiented) { 3953b024144SHong Zhang ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 396d36fbae8SSatish Balay } else { 3973b024144SHong Zhang ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 3984b0e389bSBarry Smith } 3991eb62cbbSBarry Smith } 4008a729477SBarry Smith } 40190f02eecSBarry Smith } 4023a40ed3dSBarry Smith PetscFunctionReturn(0); 4038a729477SBarry Smith } 4048a729477SBarry Smith 4054a2ae208SSatish Balay #undef __FUNCT__ 4064a2ae208SSatish Balay #define __FUNCT__ "MatGetValues_MPIAIJ" 407b1d57f15SBarry Smith PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 408b49de8d1SLois Curfman McInnes { 409b49de8d1SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 410dfbe8321SBarry Smith PetscErrorCode ierr; 411d0f46423SBarry Smith PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 412d0f46423SBarry Smith PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 413b49de8d1SLois Curfman McInnes 4143a40ed3dSBarry Smith PetscFunctionBegin; 415b49de8d1SLois Curfman McInnes for (i=0; i<m; i++) { 41697e567efSBarry Smith if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 417d0f46423SBarry Smith if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 418b49de8d1SLois Curfman McInnes if (idxm[i] >= rstart && idxm[i] < rend) { 419b49de8d1SLois Curfman McInnes row = idxm[i] - rstart; 420b49de8d1SLois Curfman McInnes for (j=0; j<n; j++) { 42197e567efSBarry Smith if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 422d0f46423SBarry Smith if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 423b49de8d1SLois Curfman McInnes if (idxn[j] >= cstart && idxn[j] < cend){ 424b49de8d1SLois Curfman McInnes col = idxn[j] - cstart; 425b49de8d1SLois Curfman McInnes ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 426fa852ad4SSatish Balay } else { 427905e6a2fSBarry Smith if (!aij->colmap) { 428905e6a2fSBarry Smith ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 429905e6a2fSBarry Smith } 430aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 4310f5bd95cSBarry Smith ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 432fa46199cSSatish Balay col --; 433b1fc9764SSatish Balay #else 434905e6a2fSBarry Smith col = aij->colmap[idxn[j]] - 1; 435b1fc9764SSatish Balay #endif 436e60e1c95SSatish Balay if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 437d9d09a02SSatish Balay else { 438b49de8d1SLois Curfman McInnes ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 439b49de8d1SLois Curfman McInnes } 440b49de8d1SLois Curfman McInnes } 441b49de8d1SLois Curfman McInnes } 442a8c6a408SBarry Smith } else { 44329bbc08cSBarry Smith SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 444b49de8d1SLois Curfman McInnes } 445b49de8d1SLois Curfman McInnes } 4463a40ed3dSBarry Smith PetscFunctionReturn(0); 447b49de8d1SLois Curfman McInnes } 448bc5ccf88SSatish Balay 449bd0c2dcbSBarry Smith extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 450bd0c2dcbSBarry Smith 4514a2ae208SSatish Balay #undef __FUNCT__ 4524a2ae208SSatish Balay #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 453dfbe8321SBarry Smith PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 454bc5ccf88SSatish Balay { 455bc5ccf88SSatish Balay Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 456dfbe8321SBarry Smith PetscErrorCode ierr; 457b1d57f15SBarry Smith PetscInt nstash,reallocs; 458bc5ccf88SSatish Balay InsertMode addv; 459bc5ccf88SSatish Balay 460bc5ccf88SSatish Balay PetscFunctionBegin; 461bc5ccf88SSatish Balay if (aij->donotstash) { 462bc5ccf88SSatish Balay PetscFunctionReturn(0); 463bc5ccf88SSatish Balay } 464bc5ccf88SSatish Balay 465bc5ccf88SSatish Balay /* make sure all processors are either in INSERTMODE or ADDMODE */ 4667adad957SLisandro Dalcin ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 467bc5ccf88SSatish Balay if (addv == (ADD_VALUES|INSERT_VALUES)) { 46829bbc08cSBarry Smith SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 469bc5ccf88SSatish Balay } 470bc5ccf88SSatish Balay mat->insertmode = addv; /* in case this processor had no cache */ 471bc5ccf88SSatish Balay 472d0f46423SBarry Smith ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 4738798bf22SSatish Balay ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 474ae15b995SBarry Smith ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 475bc5ccf88SSatish Balay PetscFunctionReturn(0); 476bc5ccf88SSatish Balay } 477bc5ccf88SSatish Balay 4784a2ae208SSatish Balay #undef __FUNCT__ 4794a2ae208SSatish Balay #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 480dfbe8321SBarry Smith PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 481bc5ccf88SSatish Balay { 482bc5ccf88SSatish Balay Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 48391c97fd4SSatish Balay Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 4846849ba73SBarry Smith PetscErrorCode ierr; 485b1d57f15SBarry Smith PetscMPIInt n; 486b1d57f15SBarry Smith PetscInt i,j,rstart,ncols,flg; 487e44c0bd4SBarry Smith PetscInt *row,*col; 488e44c0bd4SBarry Smith PetscTruth other_disassembled; 48987828ca2SBarry Smith PetscScalar *val; 490bc5ccf88SSatish Balay InsertMode addv = mat->insertmode; 491bc5ccf88SSatish Balay 49291c97fd4SSatish Balay /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 493bc5ccf88SSatish Balay PetscFunctionBegin; 494bc5ccf88SSatish Balay if (!aij->donotstash) { 495a2d1c673SSatish Balay while (1) { 4968798bf22SSatish Balay ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 497a2d1c673SSatish Balay if (!flg) break; 498a2d1c673SSatish Balay 499bc5ccf88SSatish Balay for (i=0; i<n;) { 500bc5ccf88SSatish Balay /* Now identify the consecutive vals belonging to the same row */ 501bc5ccf88SSatish Balay for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 502bc5ccf88SSatish Balay if (j < n) ncols = j-i; 503bc5ccf88SSatish Balay else ncols = n-i; 504bc5ccf88SSatish Balay /* Now assemble all these values with a single function call */ 505bc5ccf88SSatish Balay ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 506bc5ccf88SSatish Balay i = j; 507bc5ccf88SSatish Balay } 508bc5ccf88SSatish Balay } 5098798bf22SSatish Balay ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 510bc5ccf88SSatish Balay } 5112f53aa61SHong Zhang a->compressedrow.use = PETSC_FALSE; 512bc5ccf88SSatish Balay ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 513bc5ccf88SSatish Balay ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 514bc5ccf88SSatish Balay 515bc5ccf88SSatish Balay /* determine if any processor has disassembled, if so we must 516bc5ccf88SSatish Balay also disassemble ourselfs, in order that we may reassemble. */ 517bc5ccf88SSatish Balay /* 518bc5ccf88SSatish Balay if nonzero structure of submatrix B cannot change then we know that 519bc5ccf88SSatish Balay no processor disassembled thus we can skip this stuff 520bc5ccf88SSatish Balay */ 521bc5ccf88SSatish Balay if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 5227adad957SLisandro Dalcin ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 523bc5ccf88SSatish Balay if (mat->was_assembled && !other_disassembled) { 524bc5ccf88SSatish Balay ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 525ad59fb31SSatish Balay } 526ad59fb31SSatish Balay } 527bc5ccf88SSatish Balay if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 528bc5ccf88SSatish Balay ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 529bc5ccf88SSatish Balay } 5304e0d8c25SBarry Smith ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 53191c97fd4SSatish Balay ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 532bc5ccf88SSatish Balay ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 533bc5ccf88SSatish Balay ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 534bc5ccf88SSatish Balay 535606d414cSSatish Balay ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 536606d414cSSatish Balay aij->rowvalues = 0; 537a30b2313SHong Zhang 538a30b2313SHong Zhang /* used by MatAXPY() */ 53991c97fd4SSatish Balay a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 54091c97fd4SSatish Balay a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 541a30b2313SHong Zhang 542a7420bb7SBarry Smith if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;} 543bd0c2dcbSBarry Smith if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 544bc5ccf88SSatish Balay PetscFunctionReturn(0); 545bc5ccf88SSatish Balay } 546bc5ccf88SSatish Balay 5474a2ae208SSatish Balay #undef __FUNCT__ 5484a2ae208SSatish Balay #define __FUNCT__ "MatZeroEntries_MPIAIJ" 549dfbe8321SBarry Smith PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 5501eb62cbbSBarry Smith { 55144a69424SLois Curfman McInnes Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 552dfbe8321SBarry Smith PetscErrorCode ierr; 5533a40ed3dSBarry Smith 5543a40ed3dSBarry Smith PetscFunctionBegin; 55578b31e54SBarry Smith ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 55678b31e54SBarry Smith ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 5573a40ed3dSBarry Smith PetscFunctionReturn(0); 5581eb62cbbSBarry Smith } 5591eb62cbbSBarry Smith 5604a2ae208SSatish Balay #undef __FUNCT__ 5614a2ae208SSatish Balay #define __FUNCT__ "MatZeroRows_MPIAIJ" 562f4df32b1SMatthew Knepley PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 5631eb62cbbSBarry Smith { 56444a69424SLois Curfman McInnes Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 5656849ba73SBarry Smith PetscErrorCode ierr; 5667adad957SLisandro Dalcin PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 567d0f46423SBarry Smith PetscInt i,*owners = A->rmap->range; 568b1d57f15SBarry Smith PetscInt *nprocs,j,idx,nsends,row; 569b1d57f15SBarry Smith PetscInt nmax,*svalues,*starts,*owner,nrecvs; 570b1d57f15SBarry Smith PetscInt *rvalues,count,base,slen,*source; 571d0f46423SBarry Smith PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 5727adad957SLisandro Dalcin MPI_Comm comm = ((PetscObject)A)->comm; 5731eb62cbbSBarry Smith MPI_Request *send_waits,*recv_waits; 5741eb62cbbSBarry Smith MPI_Status recv_status,*send_status; 5756543fbbaSBarry Smith #if defined(PETSC_DEBUG) 5766543fbbaSBarry Smith PetscTruth found = PETSC_FALSE; 5776543fbbaSBarry Smith #endif 5781eb62cbbSBarry Smith 5793a40ed3dSBarry Smith PetscFunctionBegin; 5801eb62cbbSBarry Smith /* first count number of contributors to each processor */ 581b1d57f15SBarry Smith ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 582b1d57f15SBarry Smith ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 583b1d57f15SBarry Smith ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 5846543fbbaSBarry Smith j = 0; 5851eb62cbbSBarry Smith for (i=0; i<N; i++) { 5866543fbbaSBarry Smith if (lastidx > (idx = rows[i])) j = 0; 5876543fbbaSBarry Smith lastidx = idx; 5886543fbbaSBarry Smith for (; j<size; j++) { 5891eb62cbbSBarry Smith if (idx >= owners[j] && idx < owners[j+1]) { 5906543fbbaSBarry Smith nprocs[2*j]++; 5916543fbbaSBarry Smith nprocs[2*j+1] = 1; 5926543fbbaSBarry Smith owner[i] = j; 5936543fbbaSBarry Smith #if defined(PETSC_DEBUG) 5946543fbbaSBarry Smith found = PETSC_TRUE; 5956543fbbaSBarry Smith #endif 5966543fbbaSBarry Smith break; 5971eb62cbbSBarry Smith } 5981eb62cbbSBarry Smith } 5996543fbbaSBarry Smith #if defined(PETSC_DEBUG) 60029bbc08cSBarry Smith if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 6016543fbbaSBarry Smith found = PETSC_FALSE; 6026543fbbaSBarry Smith #endif 6031eb62cbbSBarry Smith } 604c1dc657dSBarry Smith nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 6051eb62cbbSBarry Smith 6061eb62cbbSBarry Smith /* inform other processors of number of messages and max length*/ 607c1dc657dSBarry Smith ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 6081eb62cbbSBarry Smith 6091eb62cbbSBarry Smith /* post receives: */ 610b1d57f15SBarry Smith ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 611b0a32e0cSBarry Smith ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 6121eb62cbbSBarry Smith for (i=0; i<nrecvs; i++) { 613b1d57f15SBarry Smith ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 6141eb62cbbSBarry Smith } 6151eb62cbbSBarry Smith 6161eb62cbbSBarry Smith /* do sends: 6171eb62cbbSBarry Smith 1) starts[i] gives the starting index in svalues for stuff going to 6181eb62cbbSBarry Smith the ith processor 6191eb62cbbSBarry Smith */ 620b1d57f15SBarry Smith ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 621b0a32e0cSBarry Smith ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 622b1d57f15SBarry Smith ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 6231eb62cbbSBarry Smith starts[0] = 0; 624c1dc657dSBarry Smith for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 6251eb62cbbSBarry Smith for (i=0; i<N; i++) { 6261eb62cbbSBarry Smith svalues[starts[owner[i]]++] = rows[i]; 6271eb62cbbSBarry Smith } 6281eb62cbbSBarry Smith 6291eb62cbbSBarry Smith starts[0] = 0; 630c1dc657dSBarry Smith for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 6311eb62cbbSBarry Smith count = 0; 63217699dbbSLois Curfman McInnes for (i=0; i<size; i++) { 633c1dc657dSBarry Smith if (nprocs[2*i+1]) { 634b1d57f15SBarry Smith ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 6351eb62cbbSBarry Smith } 6361eb62cbbSBarry Smith } 637606d414cSSatish Balay ierr = PetscFree(starts);CHKERRQ(ierr); 6381eb62cbbSBarry Smith 63917699dbbSLois Curfman McInnes base = owners[rank]; 6401eb62cbbSBarry Smith 6411eb62cbbSBarry Smith /* wait on receives */ 642b1d57f15SBarry Smith ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 6431eb62cbbSBarry Smith source = lens + nrecvs; 6441eb62cbbSBarry Smith count = nrecvs; slen = 0; 6451eb62cbbSBarry Smith while (count) { 646ca161407SBarry Smith ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 6471eb62cbbSBarry Smith /* unpack receives into our local space */ 648b1d57f15SBarry Smith ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 649d6dfbf8fSBarry Smith source[imdex] = recv_status.MPI_SOURCE; 650d6dfbf8fSBarry Smith lens[imdex] = n; 6511eb62cbbSBarry Smith slen += n; 6521eb62cbbSBarry Smith count--; 6531eb62cbbSBarry Smith } 654606d414cSSatish Balay ierr = PetscFree(recv_waits);CHKERRQ(ierr); 6551eb62cbbSBarry Smith 6561eb62cbbSBarry Smith /* move the data into the send scatter */ 657b1d57f15SBarry Smith ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 6581eb62cbbSBarry Smith count = 0; 6591eb62cbbSBarry Smith for (i=0; i<nrecvs; i++) { 6601eb62cbbSBarry Smith values = rvalues + i*nmax; 6611eb62cbbSBarry Smith for (j=0; j<lens[i]; j++) { 6621eb62cbbSBarry Smith lrows[count++] = values[j] - base; 6631eb62cbbSBarry Smith } 6641eb62cbbSBarry Smith } 665606d414cSSatish Balay ierr = PetscFree(rvalues);CHKERRQ(ierr); 666606d414cSSatish Balay ierr = PetscFree(lens);CHKERRQ(ierr); 667606d414cSSatish Balay ierr = PetscFree(owner);CHKERRQ(ierr); 668606d414cSSatish Balay ierr = PetscFree(nprocs);CHKERRQ(ierr); 6691eb62cbbSBarry Smith 6701eb62cbbSBarry Smith /* actually zap the local rows */ 6716eb55b6aSBarry Smith /* 6726eb55b6aSBarry Smith Zero the required rows. If the "diagonal block" of the matrix 673a8c7a070SBarry Smith is square and the user wishes to set the diagonal we use separate 6746eb55b6aSBarry Smith code so that MatSetValues() is not called for each diagonal allocating 6756eb55b6aSBarry Smith new memory, thus calling lots of mallocs and slowing things down. 6766eb55b6aSBarry Smith 6776eb55b6aSBarry Smith */ 678e2d53e46SBarry Smith /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 679f4df32b1SMatthew Knepley ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 680d0f46423SBarry Smith if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 681f4df32b1SMatthew Knepley ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 682f4df32b1SMatthew Knepley } else if (diag != 0.0) { 683f4df32b1SMatthew Knepley ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 684fa46199cSSatish Balay if (((Mat_SeqAIJ*)l->A->data)->nonew) { 68529bbc08cSBarry Smith SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 686512a5fc5SBarry Smith MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 6876525c446SSatish Balay } 688e2d53e46SBarry Smith for (i = 0; i < slen; i++) { 689e2d53e46SBarry Smith row = lrows[i] + rstart; 690f4df32b1SMatthew Knepley ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 691e2d53e46SBarry Smith } 692e2d53e46SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 693e2d53e46SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6946eb55b6aSBarry Smith } else { 695f4df32b1SMatthew Knepley ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 6966eb55b6aSBarry Smith } 697606d414cSSatish Balay ierr = PetscFree(lrows);CHKERRQ(ierr); 69872dacd9aSBarry Smith 6991eb62cbbSBarry Smith /* wait on sends */ 7001eb62cbbSBarry Smith if (nsends) { 701b0a32e0cSBarry Smith ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 702ca161407SBarry Smith ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 703606d414cSSatish Balay ierr = PetscFree(send_status);CHKERRQ(ierr); 7041eb62cbbSBarry Smith } 705606d414cSSatish Balay ierr = PetscFree(send_waits);CHKERRQ(ierr); 706606d414cSSatish Balay ierr = PetscFree(svalues);CHKERRQ(ierr); 7071eb62cbbSBarry Smith 7083a40ed3dSBarry Smith PetscFunctionReturn(0); 7091eb62cbbSBarry Smith } 7101eb62cbbSBarry Smith 7114a2ae208SSatish Balay #undef __FUNCT__ 7124a2ae208SSatish Balay #define __FUNCT__ "MatMult_MPIAIJ" 713dfbe8321SBarry Smith PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 7141eb62cbbSBarry Smith { 715416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 716dfbe8321SBarry Smith PetscErrorCode ierr; 717b1d57f15SBarry Smith PetscInt nt; 718416022c9SBarry Smith 7193a40ed3dSBarry Smith PetscFunctionBegin; 720a2ce50c7SBarry Smith ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 721d0f46423SBarry Smith if (nt != A->cmap->n) { 722d0f46423SBarry Smith SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 723fbd6ef76SBarry Smith } 724ca9f406cSSatish Balay ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 725f830108cSBarry Smith ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 726ca9f406cSSatish Balay ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 727f830108cSBarry Smith ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 7283a40ed3dSBarry Smith PetscFunctionReturn(0); 7291eb62cbbSBarry Smith } 7301eb62cbbSBarry Smith 7314a2ae208SSatish Balay #undef __FUNCT__ 732bd0c2dcbSBarry Smith #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 733bd0c2dcbSBarry Smith PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 734bd0c2dcbSBarry Smith { 735bd0c2dcbSBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 736bd0c2dcbSBarry Smith PetscErrorCode ierr; 737bd0c2dcbSBarry Smith 738bd0c2dcbSBarry Smith PetscFunctionBegin; 739bd0c2dcbSBarry Smith ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 740bd0c2dcbSBarry Smith PetscFunctionReturn(0); 741bd0c2dcbSBarry Smith } 742bd0c2dcbSBarry Smith 743bd0c2dcbSBarry Smith #undef __FUNCT__ 7444a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_MPIAIJ" 745dfbe8321SBarry Smith PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 746da3a660dSBarry Smith { 747416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 748dfbe8321SBarry Smith PetscErrorCode ierr; 7493a40ed3dSBarry Smith 7503a40ed3dSBarry Smith PetscFunctionBegin; 751ca9f406cSSatish Balay ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 752f830108cSBarry Smith ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 753ca9f406cSSatish Balay ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 754f830108cSBarry Smith ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 7553a40ed3dSBarry Smith PetscFunctionReturn(0); 756da3a660dSBarry Smith } 757da3a660dSBarry Smith 7584a2ae208SSatish Balay #undef __FUNCT__ 7594a2ae208SSatish Balay #define __FUNCT__ "MatMultTranspose_MPIAIJ" 760dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 761da3a660dSBarry Smith { 762416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 763dfbe8321SBarry Smith PetscErrorCode ierr; 764a5ff213dSBarry Smith PetscTruth merged; 765da3a660dSBarry Smith 7663a40ed3dSBarry Smith PetscFunctionBegin; 767a5ff213dSBarry Smith ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 768da3a660dSBarry Smith /* do nondiagonal part */ 7697c922b88SBarry Smith ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 770a5ff213dSBarry Smith if (!merged) { 771da3a660dSBarry Smith /* send it on its way */ 772ca9f406cSSatish Balay ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 773da3a660dSBarry Smith /* do local part */ 7747c922b88SBarry Smith ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 775da3a660dSBarry Smith /* receive remote parts: note this assumes the values are not actually */ 776a5ff213dSBarry Smith /* added in yy until the next line, */ 777ca9f406cSSatish Balay ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 778a5ff213dSBarry Smith } else { 779a5ff213dSBarry Smith /* do local part */ 780a5ff213dSBarry Smith ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 781a5ff213dSBarry Smith /* send it on its way */ 782ca9f406cSSatish Balay ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 783a5ff213dSBarry Smith /* values actually were received in the Begin() but we need to call this nop */ 784ca9f406cSSatish Balay ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 785a5ff213dSBarry Smith } 7863a40ed3dSBarry Smith PetscFunctionReturn(0); 787da3a660dSBarry Smith } 788da3a660dSBarry Smith 789cd0d46ebSvictorle EXTERN_C_BEGIN 790cd0d46ebSvictorle #undef __FUNCT__ 7915fbd3699SBarry Smith #define __FUNCT__ "MatIsTranspose_MPIAIJ" 79213c77408SMatthew Knepley PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 793cd0d46ebSvictorle { 7944f423910Svictorle MPI_Comm comm; 795cd0d46ebSvictorle Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 79666501d38Svictorle Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 797cd0d46ebSvictorle IS Me,Notme; 7986849ba73SBarry Smith PetscErrorCode ierr; 799b1d57f15SBarry Smith PetscInt M,N,first,last,*notme,i; 800b1d57f15SBarry Smith PetscMPIInt size; 801cd0d46ebSvictorle 802cd0d46ebSvictorle PetscFunctionBegin; 80342e5f5b4Svictorle 80442e5f5b4Svictorle /* Easy test: symmetric diagonal block */ 80566501d38Svictorle Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 8065485867bSBarry Smith ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 807cd0d46ebSvictorle if (!*f) PetscFunctionReturn(0); 8084f423910Svictorle ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 809b1d57f15SBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 810b1d57f15SBarry Smith if (size == 1) PetscFunctionReturn(0); 81142e5f5b4Svictorle 81242e5f5b4Svictorle /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 813cd0d46ebSvictorle ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 814cd0d46ebSvictorle ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 815b1d57f15SBarry Smith ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 816cd0d46ebSvictorle for (i=0; i<first; i++) notme[i] = i; 817cd0d46ebSvictorle for (i=last; i<M; i++) notme[i-last+first] = i; 818268466fbSBarry Smith ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 819268466fbSBarry Smith ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 820268466fbSBarry Smith ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 82166501d38Svictorle Aoff = Aoffs[0]; 822268466fbSBarry Smith ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 82366501d38Svictorle Boff = Boffs[0]; 8245485867bSBarry Smith ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 82566501d38Svictorle ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 82666501d38Svictorle ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 82742e5f5b4Svictorle ierr = ISDestroy(Me);CHKERRQ(ierr); 82842e5f5b4Svictorle ierr = ISDestroy(Notme);CHKERRQ(ierr); 82942e5f5b4Svictorle 830cd0d46ebSvictorle PetscFunctionReturn(0); 831cd0d46ebSvictorle } 832cd0d46ebSvictorle EXTERN_C_END 833cd0d46ebSvictorle 8344a2ae208SSatish Balay #undef __FUNCT__ 8354a2ae208SSatish Balay #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 836dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 837da3a660dSBarry Smith { 838416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 839dfbe8321SBarry Smith PetscErrorCode ierr; 840da3a660dSBarry Smith 8413a40ed3dSBarry Smith PetscFunctionBegin; 842da3a660dSBarry Smith /* do nondiagonal part */ 8437c922b88SBarry Smith ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 844da3a660dSBarry Smith /* send it on its way */ 845ca9f406cSSatish Balay ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 846da3a660dSBarry Smith /* do local part */ 8477c922b88SBarry Smith ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 848a5ff213dSBarry Smith /* receive remote parts */ 849ca9f406cSSatish Balay ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8503a40ed3dSBarry Smith PetscFunctionReturn(0); 851da3a660dSBarry Smith } 852da3a660dSBarry Smith 8531eb62cbbSBarry Smith /* 8541eb62cbbSBarry Smith This only works correctly for square matrices where the subblock A->A is the 8551eb62cbbSBarry Smith diagonal block 8561eb62cbbSBarry Smith */ 8574a2ae208SSatish Balay #undef __FUNCT__ 8584a2ae208SSatish Balay #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 859dfbe8321SBarry Smith PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 8601eb62cbbSBarry Smith { 861dfbe8321SBarry Smith PetscErrorCode ierr; 862416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 8633a40ed3dSBarry Smith 8643a40ed3dSBarry Smith PetscFunctionBegin; 865d0f46423SBarry Smith if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 866d0f46423SBarry Smith if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) { 86729bbc08cSBarry Smith SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 8683a40ed3dSBarry Smith } 8693a40ed3dSBarry Smith ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 8703a40ed3dSBarry Smith PetscFunctionReturn(0); 8711eb62cbbSBarry Smith } 8721eb62cbbSBarry Smith 8734a2ae208SSatish Balay #undef __FUNCT__ 8744a2ae208SSatish Balay #define __FUNCT__ "MatScale_MPIAIJ" 875f4df32b1SMatthew Knepley PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 876052efed2SBarry Smith { 877052efed2SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 878dfbe8321SBarry Smith PetscErrorCode ierr; 8793a40ed3dSBarry Smith 8803a40ed3dSBarry Smith PetscFunctionBegin; 881f4df32b1SMatthew Knepley ierr = MatScale(a->A,aa);CHKERRQ(ierr); 882f4df32b1SMatthew Knepley ierr = MatScale(a->B,aa);CHKERRQ(ierr); 8833a40ed3dSBarry Smith PetscFunctionReturn(0); 884052efed2SBarry Smith } 885052efed2SBarry Smith 8864a2ae208SSatish Balay #undef __FUNCT__ 8874a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_MPIAIJ" 888dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 8891eb62cbbSBarry Smith { 89044a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 891dfbe8321SBarry Smith PetscErrorCode ierr; 89283e2fdc7SBarry Smith 8933a40ed3dSBarry Smith PetscFunctionBegin; 894aa482453SBarry Smith #if defined(PETSC_USE_LOG) 895d0f46423SBarry Smith PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 896a5a9c739SBarry Smith #endif 8978798bf22SSatish Balay ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 898a7420bb7SBarry Smith if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);} 89978b31e54SBarry Smith ierr = MatDestroy(aij->A);CHKERRQ(ierr); 90078b31e54SBarry Smith ierr = MatDestroy(aij->B);CHKERRQ(ierr); 901aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 9029c666560SBarry Smith if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);} 903b1fc9764SSatish Balay #else 90405b42c5fSBarry Smith ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 905b1fc9764SSatish Balay #endif 90605b42c5fSBarry Smith ierr = PetscFree(aij->garray);CHKERRQ(ierr); 9077c922b88SBarry Smith if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 9087c922b88SBarry Smith if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 90905b42c5fSBarry Smith ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 9108aa348c1SBarry Smith ierr = PetscFree(aij->ld);CHKERRQ(ierr); 911606d414cSSatish Balay ierr = PetscFree(aij);CHKERRQ(ierr); 912901853e0SKris Buschelman 913dbd8c25aSHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 914901853e0SKris Buschelman ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 915901853e0SKris Buschelman ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 916901853e0SKris Buschelman ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 917901853e0SKris Buschelman ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 918901853e0SKris Buschelman ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 919ff69c46cSKris Buschelman ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 920901853e0SKris Buschelman ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 9213a40ed3dSBarry Smith PetscFunctionReturn(0); 9221eb62cbbSBarry Smith } 923ee50ffe9SBarry Smith 9244a2ae208SSatish Balay #undef __FUNCT__ 9258e2fed03SBarry Smith #define __FUNCT__ "MatView_MPIAIJ_Binary" 926dfbe8321SBarry Smith PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 9278e2fed03SBarry Smith { 9288e2fed03SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 9298e2fed03SBarry Smith Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 9308e2fed03SBarry Smith Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 9316849ba73SBarry Smith PetscErrorCode ierr; 93232dcc486SBarry Smith PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 9336f69ff64SBarry Smith int fd; 934a788621eSSatish Balay PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 935d0f46423SBarry Smith PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 9368e2fed03SBarry Smith PetscScalar *column_values; 9378e2fed03SBarry Smith 9388e2fed03SBarry Smith PetscFunctionBegin; 9397adad957SLisandro Dalcin ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 9407adad957SLisandro Dalcin ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 9418e2fed03SBarry Smith nz = A->nz + B->nz; 942958c9bccSBarry Smith if (!rank) { 9438e2fed03SBarry Smith header[0] = MAT_FILE_COOKIE; 944d0f46423SBarry Smith header[1] = mat->rmap->N; 945d0f46423SBarry Smith header[2] = mat->cmap->N; 9467adad957SLisandro Dalcin ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 9478e2fed03SBarry Smith ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 9486f69ff64SBarry Smith ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 9498e2fed03SBarry Smith /* get largest number of rows any processor has */ 950d0f46423SBarry Smith rlen = mat->rmap->n; 951d0f46423SBarry Smith range = mat->rmap->range; 9528e2fed03SBarry Smith for (i=1; i<size; i++) { 9538e2fed03SBarry Smith rlen = PetscMax(rlen,range[i+1] - range[i]); 9548e2fed03SBarry Smith } 9558e2fed03SBarry Smith } else { 9567adad957SLisandro Dalcin ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 957d0f46423SBarry Smith rlen = mat->rmap->n; 9588e2fed03SBarry Smith } 9598e2fed03SBarry Smith 9608e2fed03SBarry Smith /* load up the local row counts */ 961b1d57f15SBarry Smith ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 962d0f46423SBarry Smith for (i=0; i<mat->rmap->n; i++) { 9638e2fed03SBarry Smith row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 9648e2fed03SBarry Smith } 9658e2fed03SBarry Smith 9668e2fed03SBarry Smith /* store the row lengths to the file */ 967958c9bccSBarry Smith if (!rank) { 9688e2fed03SBarry Smith MPI_Status status; 969d0f46423SBarry Smith ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 9708e2fed03SBarry Smith for (i=1; i<size; i++) { 9718e2fed03SBarry Smith rlen = range[i+1] - range[i]; 9727adad957SLisandro Dalcin ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 9736f69ff64SBarry Smith ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 9748e2fed03SBarry Smith } 9758e2fed03SBarry Smith } else { 976d0f46423SBarry Smith ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 9778e2fed03SBarry Smith } 9788e2fed03SBarry Smith ierr = PetscFree(row_lengths);CHKERRQ(ierr); 9798e2fed03SBarry Smith 9808e2fed03SBarry Smith /* load up the local column indices */ 9818e2fed03SBarry Smith nzmax = nz; /* )th processor needs space a largest processor needs */ 9827adad957SLisandro Dalcin ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 983b1d57f15SBarry Smith ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 9848e2fed03SBarry Smith cnt = 0; 985d0f46423SBarry Smith for (i=0; i<mat->rmap->n; i++) { 9868e2fed03SBarry Smith for (j=B->i[i]; j<B->i[i+1]; j++) { 9878e2fed03SBarry Smith if ( (col = garray[B->j[j]]) > cstart) break; 9888e2fed03SBarry Smith column_indices[cnt++] = col; 9898e2fed03SBarry Smith } 9908e2fed03SBarry Smith for (k=A->i[i]; k<A->i[i+1]; k++) { 9918e2fed03SBarry Smith column_indices[cnt++] = A->j[k] + cstart; 9928e2fed03SBarry Smith } 9938e2fed03SBarry Smith for (; j<B->i[i+1]; j++) { 9948e2fed03SBarry Smith column_indices[cnt++] = garray[B->j[j]]; 9958e2fed03SBarry Smith } 9968e2fed03SBarry Smith } 99777431f27SBarry Smith if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 9988e2fed03SBarry Smith 9998e2fed03SBarry Smith /* store the column indices to the file */ 1000958c9bccSBarry Smith if (!rank) { 10018e2fed03SBarry Smith MPI_Status status; 10026f69ff64SBarry Smith ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 10038e2fed03SBarry Smith for (i=1; i<size; i++) { 10047adad957SLisandro Dalcin ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 100577431f27SBarry Smith if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 10067adad957SLisandro Dalcin ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 10076f69ff64SBarry Smith ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 10088e2fed03SBarry Smith } 10098e2fed03SBarry Smith } else { 10107adad957SLisandro Dalcin ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 10117adad957SLisandro Dalcin ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 10128e2fed03SBarry Smith } 10138e2fed03SBarry Smith ierr = PetscFree(column_indices);CHKERRQ(ierr); 10148e2fed03SBarry Smith 10158e2fed03SBarry Smith /* load up the local column values */ 10168e2fed03SBarry Smith ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 10178e2fed03SBarry Smith cnt = 0; 1018d0f46423SBarry Smith for (i=0; i<mat->rmap->n; i++) { 10198e2fed03SBarry Smith for (j=B->i[i]; j<B->i[i+1]; j++) { 10208e2fed03SBarry Smith if ( garray[B->j[j]] > cstart) break; 10218e2fed03SBarry Smith column_values[cnt++] = B->a[j]; 10228e2fed03SBarry Smith } 10238e2fed03SBarry Smith for (k=A->i[i]; k<A->i[i+1]; k++) { 10248e2fed03SBarry Smith column_values[cnt++] = A->a[k]; 10258e2fed03SBarry Smith } 10268e2fed03SBarry Smith for (; j<B->i[i+1]; j++) { 10278e2fed03SBarry Smith column_values[cnt++] = B->a[j]; 10288e2fed03SBarry Smith } 10298e2fed03SBarry Smith } 103077431f27SBarry Smith if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 10318e2fed03SBarry Smith 10328e2fed03SBarry Smith /* store the column values to the file */ 1033958c9bccSBarry Smith if (!rank) { 10348e2fed03SBarry Smith MPI_Status status; 10356f69ff64SBarry Smith ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 10368e2fed03SBarry Smith for (i=1; i<size; i++) { 10377adad957SLisandro Dalcin ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 103877431f27SBarry Smith if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 10397adad957SLisandro Dalcin ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 10406f69ff64SBarry Smith ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 10418e2fed03SBarry Smith } 10428e2fed03SBarry Smith } else { 10437adad957SLisandro Dalcin ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 10447adad957SLisandro Dalcin ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 10458e2fed03SBarry Smith } 10468e2fed03SBarry Smith ierr = PetscFree(column_values);CHKERRQ(ierr); 10478e2fed03SBarry Smith PetscFunctionReturn(0); 10488e2fed03SBarry Smith } 10498e2fed03SBarry Smith 10508e2fed03SBarry Smith #undef __FUNCT__ 10514a2ae208SSatish Balay #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1052dfbe8321SBarry Smith PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1053416022c9SBarry Smith { 105444a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1055dfbe8321SBarry Smith PetscErrorCode ierr; 105632dcc486SBarry Smith PetscMPIInt rank = aij->rank,size = aij->size; 1057d38fa0fbSBarry Smith PetscTruth isdraw,iascii,isbinary; 1058b0a32e0cSBarry Smith PetscViewer sviewer; 1059f3ef73ceSBarry Smith PetscViewerFormat format; 1060416022c9SBarry Smith 10613a40ed3dSBarry Smith PetscFunctionBegin; 1062fb9695e5SSatish Balay ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 106332077d6dSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 10648e2fed03SBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 106532077d6dSBarry Smith if (iascii) { 1066b0a32e0cSBarry Smith ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1067456192e2SBarry Smith if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 10684e220ebcSLois Curfman McInnes MatInfo info; 1069923f20ffSKris Buschelman PetscTruth inodes; 1070923f20ffSKris Buschelman 10717adad957SLisandro Dalcin ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1072888f2ed8SSatish Balay ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1073923f20ffSKris Buschelman ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1074923f20ffSKris Buschelman if (!inodes) { 107577431f27SBarry Smith ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1076d0f46423SBarry Smith rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 10776831982aSBarry Smith } else { 107877431f27SBarry Smith ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1079d0f46423SBarry Smith rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 10806831982aSBarry Smith } 1081888f2ed8SSatish Balay ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 108277431f27SBarry Smith ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1083888f2ed8SSatish Balay ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 108477431f27SBarry Smith ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1085b0a32e0cSBarry Smith ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 108607d81ca4SBarry Smith ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1087a40aa06bSLois Curfman McInnes ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 10883a40ed3dSBarry Smith PetscFunctionReturn(0); 1089fb9695e5SSatish Balay } else if (format == PETSC_VIEWER_ASCII_INFO) { 1090923f20ffSKris Buschelman PetscInt inodecount,inodelimit,*inodes; 1091923f20ffSKris Buschelman ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1092923f20ffSKris Buschelman if (inodes) { 1093923f20ffSKris Buschelman ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1094d38fa0fbSBarry Smith } else { 1095d38fa0fbSBarry Smith ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1096d38fa0fbSBarry Smith } 10973a40ed3dSBarry Smith PetscFunctionReturn(0); 10984aedb280SBarry Smith } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 10994aedb280SBarry Smith PetscFunctionReturn(0); 110008480c60SBarry Smith } 11018e2fed03SBarry Smith } else if (isbinary) { 11028e2fed03SBarry Smith if (size == 1) { 11037adad957SLisandro Dalcin ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 11048e2fed03SBarry Smith ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 11058e2fed03SBarry Smith } else { 11068e2fed03SBarry Smith ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 11078e2fed03SBarry Smith } 11088e2fed03SBarry Smith PetscFunctionReturn(0); 11090f5bd95cSBarry Smith } else if (isdraw) { 1110b0a32e0cSBarry Smith PetscDraw draw; 111119bcc07fSBarry Smith PetscTruth isnull; 1112b0a32e0cSBarry Smith ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1113b0a32e0cSBarry Smith ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 111419bcc07fSBarry Smith } 111519bcc07fSBarry Smith 111617699dbbSLois Curfman McInnes if (size == 1) { 11177adad957SLisandro Dalcin ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 111878b31e54SBarry Smith ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 11193a40ed3dSBarry Smith } else { 112095373324SBarry Smith /* assemble the entire matrix onto first processor. */ 112195373324SBarry Smith Mat A; 1122ec8511deSBarry Smith Mat_SeqAIJ *Aloc; 1123d0f46423SBarry Smith PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1124dd6ea824SBarry Smith MatScalar *a; 11252ee70a88SLois Curfman McInnes 112632a366e4SMatthew Knepley if (mat->rmap->N > 1024) { 112790d69ab7SBarry Smith PetscTruth flg = PETSC_FALSE; 112832a366e4SMatthew Knepley 11290c235cafSBarry Smith ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 113032a366e4SMatthew Knepley if (!flg) { 113190d69ab7SBarry Smith SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 113232a366e4SMatthew Knepley } 113332a366e4SMatthew Knepley } 11340805154bSBarry Smith 11357adad957SLisandro Dalcin ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 113617699dbbSLois Curfman McInnes if (!rank) { 1137f69a0ea3SMatthew Knepley ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 11383a40ed3dSBarry Smith } else { 1139f69a0ea3SMatthew Knepley ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 114095373324SBarry Smith } 1141f204ca49SKris Buschelman /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1142f204ca49SKris Buschelman ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1143f204ca49SKris Buschelman ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 114452e6d16bSBarry Smith ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1145416022c9SBarry Smith 114695373324SBarry Smith /* copy over the A part */ 1147ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*)aij->A->data; 1148d0f46423SBarry Smith m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1149d0f46423SBarry Smith row = mat->rmap->rstart; 1150d0f46423SBarry Smith for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 115195373324SBarry Smith for (i=0; i<m; i++) { 1152416022c9SBarry Smith ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 115395373324SBarry Smith row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 115495373324SBarry Smith } 11552ee70a88SLois Curfman McInnes aj = Aloc->j; 1156d0f46423SBarry Smith for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 115795373324SBarry Smith 115895373324SBarry Smith /* copy over the B part */ 1159ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*)aij->B->data; 1160d0f46423SBarry Smith m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1161d0f46423SBarry Smith row = mat->rmap->rstart; 1162b1d57f15SBarry Smith ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1163b0a32e0cSBarry Smith ct = cols; 1164bfec09a0SHong Zhang for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 116595373324SBarry Smith for (i=0; i<m; i++) { 1166416022c9SBarry Smith ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 116795373324SBarry Smith row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 116895373324SBarry Smith } 1169606d414cSSatish Balay ierr = PetscFree(ct);CHKERRQ(ierr); 11706d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 11716d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 117255843e3eSBarry Smith /* 117355843e3eSBarry Smith Everyone has to call to draw the matrix since the graphics waits are 1174b0a32e0cSBarry Smith synchronized across all processors that share the PetscDraw object 117555843e3eSBarry Smith */ 1176b0a32e0cSBarry Smith ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1177e03a110bSBarry Smith if (!rank) { 11787adad957SLisandro Dalcin ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 11796831982aSBarry Smith ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 118095373324SBarry Smith } 1181b0a32e0cSBarry Smith ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 118278b31e54SBarry Smith ierr = MatDestroy(A);CHKERRQ(ierr); 118395373324SBarry Smith } 11843a40ed3dSBarry Smith PetscFunctionReturn(0); 11851eb62cbbSBarry Smith } 11861eb62cbbSBarry Smith 11874a2ae208SSatish Balay #undef __FUNCT__ 11884a2ae208SSatish Balay #define __FUNCT__ "MatView_MPIAIJ" 1189dfbe8321SBarry Smith PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1190416022c9SBarry Smith { 1191dfbe8321SBarry Smith PetscErrorCode ierr; 119232077d6dSBarry Smith PetscTruth iascii,isdraw,issocket,isbinary; 1193416022c9SBarry Smith 11943a40ed3dSBarry Smith PetscFunctionBegin; 119532077d6dSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1196fb9695e5SSatish Balay ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1197fb9695e5SSatish Balay ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1198b0a32e0cSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 119932077d6dSBarry Smith if (iascii || isdraw || isbinary || issocket) { 12007b2a1423SBarry Smith ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 12015cd90555SBarry Smith } else { 120279a5c55eSBarry Smith SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1203416022c9SBarry Smith } 12043a40ed3dSBarry Smith PetscFunctionReturn(0); 1205416022c9SBarry Smith } 1206416022c9SBarry Smith 12074a2ae208SSatish Balay #undef __FUNCT__ 12084a2ae208SSatish Balay #define __FUNCT__ "MatRelax_MPIAIJ" 1209b1d57f15SBarry Smith PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 12108a729477SBarry Smith { 121144a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1212dfbe8321SBarry Smith PetscErrorCode ierr; 12136987fefcSBarry Smith Vec bb1 = 0; 1214bd0c2dcbSBarry Smith PetscTruth hasop; 12158a729477SBarry Smith 12163a40ed3dSBarry Smith PetscFunctionBegin; 121785911e72SJed Brown if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 121885911e72SJed Brown ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 121985911e72SJed Brown } 12202798e883SHong Zhang 1221*a2b30743SBarry Smith if (flag == SOR_APPLY_UPPER) { 1222*a2b30743SBarry Smith ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1223*a2b30743SBarry Smith PetscFunctionReturn(0); 1224*a2b30743SBarry Smith } 1225*a2b30743SBarry Smith 1226c16cb8f2SBarry Smith if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1227da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 122864aae45aSBarry Smith ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 12292798e883SHong Zhang its--; 1230da3a660dSBarry Smith } 12312798e883SHong Zhang 12322798e883SHong Zhang while (its--) { 1233ca9f406cSSatish Balay ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1234ca9f406cSSatish Balay ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 12352798e883SHong Zhang 1236c14dc6b6SHong Zhang /* update rhs: bb1 = bb - B*x */ 1237efb30889SBarry Smith ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1238c14dc6b6SHong Zhang ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 12392798e883SHong Zhang 1240c14dc6b6SHong Zhang /* local sweep */ 124164aae45aSBarry Smith ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 12422798e883SHong Zhang } 12433a40ed3dSBarry Smith } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1244da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 124564aae45aSBarry Smith ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 12462798e883SHong Zhang its--; 1247da3a660dSBarry Smith } 12482798e883SHong Zhang while (its--) { 1249ca9f406cSSatish Balay ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1250ca9f406cSSatish Balay ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 12512798e883SHong Zhang 1252c14dc6b6SHong Zhang /* update rhs: bb1 = bb - B*x */ 1253efb30889SBarry Smith ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1254c14dc6b6SHong Zhang ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1255c14dc6b6SHong Zhang 1256c14dc6b6SHong Zhang /* local sweep */ 125764aae45aSBarry Smith ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 12582798e883SHong Zhang } 12593a40ed3dSBarry Smith } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1260da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 126164aae45aSBarry Smith ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 12622798e883SHong Zhang its--; 1263da3a660dSBarry Smith } 12642798e883SHong Zhang while (its--) { 1265ca9f406cSSatish Balay ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1266ca9f406cSSatish Balay ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 12672798e883SHong Zhang 1268c14dc6b6SHong Zhang /* update rhs: bb1 = bb - B*x */ 1269efb30889SBarry Smith ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1270c14dc6b6SHong Zhang ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 12712798e883SHong Zhang 1272c14dc6b6SHong Zhang /* local sweep */ 127364aae45aSBarry Smith ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 12742798e883SHong Zhang } 1275a7420bb7SBarry Smith } else if (flag & SOR_EISENSTAT) { 1276a7420bb7SBarry Smith Vec xx1; 1277a7420bb7SBarry Smith 1278a7420bb7SBarry Smith ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1279a7420bb7SBarry Smith ierr = (*mat->A->ops->relax)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1280a7420bb7SBarry Smith 1281a7420bb7SBarry Smith ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1282a7420bb7SBarry Smith ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1283a7420bb7SBarry Smith if (!mat->diag) { 1284a7420bb7SBarry Smith ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1285a7420bb7SBarry Smith ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1286a7420bb7SBarry Smith } 1287bd0c2dcbSBarry Smith ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1288bd0c2dcbSBarry Smith if (hasop) { 1289bd0c2dcbSBarry Smith ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1290bd0c2dcbSBarry Smith } else { 1291a7420bb7SBarry Smith ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1292bd0c2dcbSBarry Smith } 1293effcda25SBarry Smith ierr = VecAYPX(bb1,-1.0,bb);CHKERRQ(ierr); 1294a7420bb7SBarry Smith ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1295a7420bb7SBarry Smith 1296a7420bb7SBarry Smith /* local sweep */ 1297a7420bb7SBarry Smith ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1298a7420bb7SBarry Smith ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1299a7420bb7SBarry Smith ierr = VecDestroy(xx1);CHKERRQ(ierr); 13003a40ed3dSBarry Smith } else { 130129bbc08cSBarry Smith SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1302c16cb8f2SBarry Smith } 1303c14dc6b6SHong Zhang 13046987fefcSBarry Smith if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);} 13053a40ed3dSBarry Smith PetscFunctionReturn(0); 13068a729477SBarry Smith } 1307a66be287SLois Curfman McInnes 13084a2ae208SSatish Balay #undef __FUNCT__ 130942e855d1Svictor #define __FUNCT__ "MatPermute_MPIAIJ" 131042e855d1Svictor PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 131142e855d1Svictor { 131242e855d1Svictor MPI_Comm comm,pcomm; 13135d0c19d7SBarry Smith PetscInt first,local_size,nrows; 13145d0c19d7SBarry Smith const PetscInt *rows; 1315dbf0e21dSBarry Smith PetscMPIInt size; 131642e855d1Svictor IS crowp,growp,irowp,lrowp,lcolp,icolp; 131742e855d1Svictor PetscErrorCode ierr; 131842e855d1Svictor 131942e855d1Svictor PetscFunctionBegin; 132042e855d1Svictor ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 132142e855d1Svictor /* make a collective version of 'rowp' */ 132242e855d1Svictor ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 132342e855d1Svictor if (pcomm==comm) { 132442e855d1Svictor crowp = rowp; 132542e855d1Svictor } else { 132642e855d1Svictor ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 132742e855d1Svictor ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 132842e855d1Svictor ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr); 132942e855d1Svictor ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 133042e855d1Svictor } 133142e855d1Svictor /* collect the global row permutation and invert it */ 133242e855d1Svictor ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 133342e855d1Svictor ierr = ISSetPermutation(growp);CHKERRQ(ierr); 133442e855d1Svictor if (pcomm!=comm) { 133542e855d1Svictor ierr = ISDestroy(crowp);CHKERRQ(ierr); 133642e855d1Svictor } 133742e855d1Svictor ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 133842e855d1Svictor /* get the local target indices */ 133942e855d1Svictor ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 134042e855d1Svictor ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 134142e855d1Svictor ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 134242e855d1Svictor ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr); 134342e855d1Svictor ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 134442e855d1Svictor ierr = ISDestroy(irowp);CHKERRQ(ierr); 134542e855d1Svictor /* the column permutation is so much easier; 134642e855d1Svictor make a local version of 'colp' and invert it */ 134742e855d1Svictor ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 1348dbf0e21dSBarry Smith ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 1349dbf0e21dSBarry Smith if (size==1) { 135042e855d1Svictor lcolp = colp; 135142e855d1Svictor } else { 135242e855d1Svictor ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 135342e855d1Svictor ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 135442e855d1Svictor ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr); 135542e855d1Svictor } 1356dbf0e21dSBarry Smith ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 135742e855d1Svictor ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 13584aa3045dSJed Brown ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 1359dbf0e21dSBarry Smith if (size>1) { 136042e855d1Svictor ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 136142e855d1Svictor ierr = ISDestroy(lcolp);CHKERRQ(ierr); 136242e855d1Svictor } 136342e855d1Svictor /* now we just get the submatrix */ 13644aa3045dSJed Brown ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 136542e855d1Svictor /* clean up */ 136642e855d1Svictor ierr = ISDestroy(lrowp);CHKERRQ(ierr); 136742e855d1Svictor ierr = ISDestroy(icolp);CHKERRQ(ierr); 136842e855d1Svictor PetscFunctionReturn(0); 136942e855d1Svictor } 137042e855d1Svictor 137142e855d1Svictor #undef __FUNCT__ 13724a2ae208SSatish Balay #define __FUNCT__ "MatGetInfo_MPIAIJ" 1373dfbe8321SBarry Smith PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1374a66be287SLois Curfman McInnes { 1375a66be287SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1376a66be287SLois Curfman McInnes Mat A = mat->A,B = mat->B; 1377dfbe8321SBarry Smith PetscErrorCode ierr; 1378329f5518SBarry Smith PetscReal isend[5],irecv[5]; 1379a66be287SLois Curfman McInnes 13803a40ed3dSBarry Smith PetscFunctionBegin; 13814e220ebcSLois Curfman McInnes info->block_size = 1.0; 13824e220ebcSLois Curfman McInnes ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 13834e220ebcSLois Curfman McInnes isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 13844e220ebcSLois Curfman McInnes isend[3] = info->memory; isend[4] = info->mallocs; 13854e220ebcSLois Curfman McInnes ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 13864e220ebcSLois Curfman McInnes isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 13874e220ebcSLois Curfman McInnes isend[3] += info->memory; isend[4] += info->mallocs; 1388a66be287SLois Curfman McInnes if (flag == MAT_LOCAL) { 13894e220ebcSLois Curfman McInnes info->nz_used = isend[0]; 13904e220ebcSLois Curfman McInnes info->nz_allocated = isend[1]; 13914e220ebcSLois Curfman McInnes info->nz_unneeded = isend[2]; 13924e220ebcSLois Curfman McInnes info->memory = isend[3]; 13934e220ebcSLois Curfman McInnes info->mallocs = isend[4]; 1394a66be287SLois Curfman McInnes } else if (flag == MAT_GLOBAL_MAX) { 13957adad957SLisandro Dalcin ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 13964e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 13974e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 13984e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 13994e220ebcSLois Curfman McInnes info->memory = irecv[3]; 14004e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 1401a66be287SLois Curfman McInnes } else if (flag == MAT_GLOBAL_SUM) { 14027adad957SLisandro Dalcin ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 14034e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 14044e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 14054e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 14064e220ebcSLois Curfman McInnes info->memory = irecv[3]; 14074e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 1408a66be287SLois Curfman McInnes } 14094e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 14104e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 14114e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 14124e220ebcSLois Curfman McInnes 14133a40ed3dSBarry Smith PetscFunctionReturn(0); 1414a66be287SLois Curfman McInnes } 1415a66be287SLois Curfman McInnes 14164a2ae208SSatish Balay #undef __FUNCT__ 14174a2ae208SSatish Balay #define __FUNCT__ "MatSetOption_MPIAIJ" 14184e0d8c25SBarry Smith PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg) 1419c74985f6SBarry Smith { 1420c0bbcb79SLois Curfman McInnes Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1421dfbe8321SBarry Smith PetscErrorCode ierr; 1422c74985f6SBarry Smith 14233a40ed3dSBarry Smith PetscFunctionBegin; 142412c028f9SKris Buschelman switch (op) { 1425512a5fc5SBarry Smith case MAT_NEW_NONZERO_LOCATIONS: 142612c028f9SKris Buschelman case MAT_NEW_NONZERO_ALLOCATION_ERR: 142728b2fa4aSMatthew Knepley case MAT_UNUSED_NONZERO_LOCATION_ERR: 1428a9817697SBarry Smith case MAT_KEEP_NONZERO_PATTERN: 142912c028f9SKris Buschelman case MAT_NEW_NONZERO_LOCATION_ERR: 143012c028f9SKris Buschelman case MAT_USE_INODES: 143112c028f9SKris Buschelman case MAT_IGNORE_ZERO_ENTRIES: 14324e0d8c25SBarry Smith ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 14334e0d8c25SBarry Smith ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 143412c028f9SKris Buschelman break; 143512c028f9SKris Buschelman case MAT_ROW_ORIENTED: 14364e0d8c25SBarry Smith a->roworiented = flg; 14374e0d8c25SBarry Smith ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 14384e0d8c25SBarry Smith ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 143912c028f9SKris Buschelman break; 14404e0d8c25SBarry Smith case MAT_NEW_DIAGONALS: 1441290bbb0aSBarry Smith ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 144212c028f9SKris Buschelman break; 144312c028f9SKris Buschelman case MAT_IGNORE_OFF_PROC_ENTRIES: 14447c922b88SBarry Smith a->donotstash = PETSC_TRUE; 144512c028f9SKris Buschelman break; 144677e54ba9SKris Buschelman case MAT_SYMMETRIC: 14474e0d8c25SBarry Smith ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 144825f421beSHong Zhang break; 144977e54ba9SKris Buschelman case MAT_STRUCTURALLY_SYMMETRIC: 1450bf108f30SBarry Smith case MAT_HERMITIAN: 1451bf108f30SBarry Smith case MAT_SYMMETRY_ETERNAL: 14524e0d8c25SBarry Smith ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 145377e54ba9SKris Buschelman break; 145412c028f9SKris Buschelman default: 1455ad86a440SBarry Smith SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 14563a40ed3dSBarry Smith } 14573a40ed3dSBarry Smith PetscFunctionReturn(0); 1458c74985f6SBarry Smith } 1459c74985f6SBarry Smith 14604a2ae208SSatish Balay #undef __FUNCT__ 14614a2ae208SSatish Balay #define __FUNCT__ "MatGetRow_MPIAIJ" 1462b1d57f15SBarry Smith PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 146339e00950SLois Curfman McInnes { 1464154123eaSLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 146587828ca2SBarry Smith PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 14666849ba73SBarry Smith PetscErrorCode ierr; 1467d0f46423SBarry Smith PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1468d0f46423SBarry Smith PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1469b1d57f15SBarry Smith PetscInt *cmap,*idx_p; 147039e00950SLois Curfman McInnes 14713a40ed3dSBarry Smith PetscFunctionBegin; 1472abc0a331SBarry Smith if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 14737a0afa10SBarry Smith mat->getrowactive = PETSC_TRUE; 14747a0afa10SBarry Smith 147570f0671dSBarry Smith if (!mat->rowvalues && (idx || v)) { 14767a0afa10SBarry Smith /* 14777a0afa10SBarry Smith allocate enough space to hold information from the longest row. 14787a0afa10SBarry Smith */ 14797a0afa10SBarry Smith Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1480b1d57f15SBarry Smith PetscInt max = 1,tmp; 1481d0f46423SBarry Smith for (i=0; i<matin->rmap->n; i++) { 14827a0afa10SBarry Smith tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 14837a0afa10SBarry Smith if (max < tmp) { max = tmp; } 14847a0afa10SBarry Smith } 1485b1d57f15SBarry Smith ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1486b1d57f15SBarry Smith mat->rowindices = (PetscInt*)(mat->rowvalues + max); 14877a0afa10SBarry Smith } 14887a0afa10SBarry Smith 148929bbc08cSBarry Smith if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1490abc0e9e4SLois Curfman McInnes lrow = row - rstart; 149139e00950SLois Curfman McInnes 1492154123eaSLois Curfman McInnes pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1493154123eaSLois Curfman McInnes if (!v) {pvA = 0; pvB = 0;} 1494154123eaSLois Curfman McInnes if (!idx) {pcA = 0; if (!v) pcB = 0;} 1495f830108cSBarry Smith ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1496f830108cSBarry Smith ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1497154123eaSLois Curfman McInnes nztot = nzA + nzB; 1498154123eaSLois Curfman McInnes 149970f0671dSBarry Smith cmap = mat->garray; 1500154123eaSLois Curfman McInnes if (v || idx) { 1501154123eaSLois Curfman McInnes if (nztot) { 1502154123eaSLois Curfman McInnes /* Sort by increasing column numbers, assuming A and B already sorted */ 1503b1d57f15SBarry Smith PetscInt imark = -1; 1504154123eaSLois Curfman McInnes if (v) { 150570f0671dSBarry Smith *v = v_p = mat->rowvalues; 150639e00950SLois Curfman McInnes for (i=0; i<nzB; i++) { 150770f0671dSBarry Smith if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1508154123eaSLois Curfman McInnes else break; 1509154123eaSLois Curfman McInnes } 1510154123eaSLois Curfman McInnes imark = i; 151170f0671dSBarry Smith for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 151270f0671dSBarry Smith for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1513154123eaSLois Curfman McInnes } 1514154123eaSLois Curfman McInnes if (idx) { 151570f0671dSBarry Smith *idx = idx_p = mat->rowindices; 151670f0671dSBarry Smith if (imark > -1) { 151770f0671dSBarry Smith for (i=0; i<imark; i++) { 151870f0671dSBarry Smith idx_p[i] = cmap[cworkB[i]]; 151970f0671dSBarry Smith } 152070f0671dSBarry Smith } else { 1521154123eaSLois Curfman McInnes for (i=0; i<nzB; i++) { 152270f0671dSBarry Smith if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1523154123eaSLois Curfman McInnes else break; 1524154123eaSLois Curfman McInnes } 1525154123eaSLois Curfman McInnes imark = i; 152670f0671dSBarry Smith } 152770f0671dSBarry Smith for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 152870f0671dSBarry Smith for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 152939e00950SLois Curfman McInnes } 15303f97c4b0SBarry Smith } else { 15311ca473b0SSatish Balay if (idx) *idx = 0; 15321ca473b0SSatish Balay if (v) *v = 0; 15331ca473b0SSatish Balay } 1534154123eaSLois Curfman McInnes } 153539e00950SLois Curfman McInnes *nz = nztot; 1536f830108cSBarry Smith ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1537f830108cSBarry Smith ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 15383a40ed3dSBarry Smith PetscFunctionReturn(0); 153939e00950SLois Curfman McInnes } 154039e00950SLois Curfman McInnes 15414a2ae208SSatish Balay #undef __FUNCT__ 15424a2ae208SSatish Balay #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1543b1d57f15SBarry Smith PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 154439e00950SLois Curfman McInnes { 15457a0afa10SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 15463a40ed3dSBarry Smith 15473a40ed3dSBarry Smith PetscFunctionBegin; 1548abc0a331SBarry Smith if (!aij->getrowactive) { 1549abc0a331SBarry Smith SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 15507a0afa10SBarry Smith } 15517a0afa10SBarry Smith aij->getrowactive = PETSC_FALSE; 15523a40ed3dSBarry Smith PetscFunctionReturn(0); 155339e00950SLois Curfman McInnes } 155439e00950SLois Curfman McInnes 15554a2ae208SSatish Balay #undef __FUNCT__ 15564a2ae208SSatish Balay #define __FUNCT__ "MatNorm_MPIAIJ" 1557dfbe8321SBarry Smith PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1558855ac2c5SLois Curfman McInnes { 1559855ac2c5SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1560ec8511deSBarry Smith Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1561dfbe8321SBarry Smith PetscErrorCode ierr; 1562d0f46423SBarry Smith PetscInt i,j,cstart = mat->cmap->rstart; 1563329f5518SBarry Smith PetscReal sum = 0.0; 1564a77337e4SBarry Smith MatScalar *v; 156504ca555eSLois Curfman McInnes 15663a40ed3dSBarry Smith PetscFunctionBegin; 156717699dbbSLois Curfman McInnes if (aij->size == 1) { 156814183eadSLois Curfman McInnes ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 156937fa93a5SLois Curfman McInnes } else { 157004ca555eSLois Curfman McInnes if (type == NORM_FROBENIUS) { 157104ca555eSLois Curfman McInnes v = amat->a; 157204ca555eSLois Curfman McInnes for (i=0; i<amat->nz; i++) { 1573aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX) 1574329f5518SBarry Smith sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 157504ca555eSLois Curfman McInnes #else 157604ca555eSLois Curfman McInnes sum += (*v)*(*v); v++; 157704ca555eSLois Curfman McInnes #endif 157804ca555eSLois Curfman McInnes } 157904ca555eSLois Curfman McInnes v = bmat->a; 158004ca555eSLois Curfman McInnes for (i=0; i<bmat->nz; i++) { 1581aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX) 1582329f5518SBarry Smith sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 158304ca555eSLois Curfman McInnes #else 158404ca555eSLois Curfman McInnes sum += (*v)*(*v); v++; 158504ca555eSLois Curfman McInnes #endif 158604ca555eSLois Curfman McInnes } 15877adad957SLisandro Dalcin ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 158804ca555eSLois Curfman McInnes *norm = sqrt(*norm); 15893a40ed3dSBarry Smith } else if (type == NORM_1) { /* max column norm */ 1590329f5518SBarry Smith PetscReal *tmp,*tmp2; 1591b1d57f15SBarry Smith PetscInt *jj,*garray = aij->garray; 1592d0f46423SBarry Smith ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1593d0f46423SBarry Smith ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1594d0f46423SBarry Smith ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 159504ca555eSLois Curfman McInnes *norm = 0.0; 159604ca555eSLois Curfman McInnes v = amat->a; jj = amat->j; 159704ca555eSLois Curfman McInnes for (j=0; j<amat->nz; j++) { 1598bfec09a0SHong Zhang tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 159904ca555eSLois Curfman McInnes } 160004ca555eSLois Curfman McInnes v = bmat->a; jj = bmat->j; 160104ca555eSLois Curfman McInnes for (j=0; j<bmat->nz; j++) { 1602bfec09a0SHong Zhang tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 160304ca555eSLois Curfman McInnes } 1604d0f46423SBarry Smith ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1605d0f46423SBarry Smith for (j=0; j<mat->cmap->N; j++) { 160604ca555eSLois Curfman McInnes if (tmp2[j] > *norm) *norm = tmp2[j]; 160704ca555eSLois Curfman McInnes } 1608606d414cSSatish Balay ierr = PetscFree(tmp);CHKERRQ(ierr); 1609606d414cSSatish Balay ierr = PetscFree(tmp2);CHKERRQ(ierr); 16103a40ed3dSBarry Smith } else if (type == NORM_INFINITY) { /* max row norm */ 1611329f5518SBarry Smith PetscReal ntemp = 0.0; 1612d0f46423SBarry Smith for (j=0; j<aij->A->rmap->n; j++) { 1613bfec09a0SHong Zhang v = amat->a + amat->i[j]; 161404ca555eSLois Curfman McInnes sum = 0.0; 161504ca555eSLois Curfman McInnes for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1616cddf8d76SBarry Smith sum += PetscAbsScalar(*v); v++; 161704ca555eSLois Curfman McInnes } 1618bfec09a0SHong Zhang v = bmat->a + bmat->i[j]; 161904ca555eSLois Curfman McInnes for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1620cddf8d76SBarry Smith sum += PetscAbsScalar(*v); v++; 162104ca555eSLois Curfman McInnes } 1622515d9167SLois Curfman McInnes if (sum > ntemp) ntemp = sum; 162304ca555eSLois Curfman McInnes } 16247adad957SLisandro Dalcin ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1625ca161407SBarry Smith } else { 162629bbc08cSBarry Smith SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 162704ca555eSLois Curfman McInnes } 162837fa93a5SLois Curfman McInnes } 16293a40ed3dSBarry Smith PetscFunctionReturn(0); 1630855ac2c5SLois Curfman McInnes } 1631855ac2c5SLois Curfman McInnes 16324a2ae208SSatish Balay #undef __FUNCT__ 16334a2ae208SSatish Balay #define __FUNCT__ "MatTranspose_MPIAIJ" 1634fc4dec0aSBarry Smith PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1635b7c46309SBarry Smith { 1636b7c46309SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1637da668accSHong Zhang Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1638dfbe8321SBarry Smith PetscErrorCode ierr; 1639d0f46423SBarry Smith PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 1640d0f46423SBarry Smith PetscInt cstart=A->cmap->rstart,ncol; 16413a40ed3dSBarry Smith Mat B; 1642a77337e4SBarry Smith MatScalar *array; 1643b7c46309SBarry Smith 16443a40ed3dSBarry Smith PetscFunctionBegin; 1645e9695a30SBarry Smith if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1646da668accSHong Zhang 1647d0f46423SBarry Smith ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 1648da668accSHong Zhang ai = Aloc->i; aj = Aloc->j; 1649da668accSHong Zhang bi = Bloc->i; bj = Bloc->j; 1650fc73b1b3SBarry Smith if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1651fc73b1b3SBarry Smith /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 1652fc73b1b3SBarry Smith ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1653da668accSHong Zhang ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 1654da668accSHong Zhang for (i=0; i<ai[ma]; i++){ 1655da668accSHong Zhang d_nnz[aj[i]] ++; 1656da668accSHong Zhang aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1657d4bb536fSBarry Smith } 1658d4bb536fSBarry Smith 16597adad957SLisandro Dalcin ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1660d0f46423SBarry Smith ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 16617adad957SLisandro Dalcin ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1662da668accSHong Zhang ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 1663fc73b1b3SBarry Smith ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1664fc4dec0aSBarry Smith } else { 1665fc4dec0aSBarry Smith B = *matout; 1666fc4dec0aSBarry Smith } 1667b7c46309SBarry Smith 1668b7c46309SBarry Smith /* copy over the A part */ 1669da668accSHong Zhang array = Aloc->a; 1670d0f46423SBarry Smith row = A->rmap->rstart; 1671da668accSHong Zhang for (i=0; i<ma; i++) { 1672da668accSHong Zhang ncol = ai[i+1]-ai[i]; 1673da668accSHong Zhang ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1674da668accSHong Zhang row++; array += ncol; aj += ncol; 1675b7c46309SBarry Smith } 1676b7c46309SBarry Smith aj = Aloc->j; 1677da668accSHong Zhang for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1678b7c46309SBarry Smith 1679b7c46309SBarry Smith /* copy over the B part */ 1680fc73b1b3SBarry Smith ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1681fc73b1b3SBarry Smith ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 1682da668accSHong Zhang array = Bloc->a; 1683d0f46423SBarry Smith row = A->rmap->rstart; 1684da668accSHong Zhang for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 168561a2fbbaSHong Zhang cols_tmp = cols; 1686da668accSHong Zhang for (i=0; i<mb; i++) { 1687da668accSHong Zhang ncol = bi[i+1]-bi[i]; 168861a2fbbaSHong Zhang ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 168961a2fbbaSHong Zhang row++; array += ncol; cols_tmp += ncol; 1690b7c46309SBarry Smith } 1691fc73b1b3SBarry Smith ierr = PetscFree(cols);CHKERRQ(ierr); 1692fc73b1b3SBarry Smith 16936d4a8577SBarry Smith ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 16946d4a8577SBarry Smith ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1695815cbec1SBarry Smith if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 16960de55854SLois Curfman McInnes *matout = B; 16970de55854SLois Curfman McInnes } else { 1698273d9f13SBarry Smith ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 16990de55854SLois Curfman McInnes } 17003a40ed3dSBarry Smith PetscFunctionReturn(0); 1701b7c46309SBarry Smith } 1702b7c46309SBarry Smith 17034a2ae208SSatish Balay #undef __FUNCT__ 17044a2ae208SSatish Balay #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1705dfbe8321SBarry Smith PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1706a008b906SSatish Balay { 17074b967eb1SSatish Balay Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 17084b967eb1SSatish Balay Mat a = aij->A,b = aij->B; 1709dfbe8321SBarry Smith PetscErrorCode ierr; 1710b1d57f15SBarry Smith PetscInt s1,s2,s3; 1711a008b906SSatish Balay 17123a40ed3dSBarry Smith PetscFunctionBegin; 17134b967eb1SSatish Balay ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 17144b967eb1SSatish Balay if (rr) { 1715e1311b90SBarry Smith ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 171629bbc08cSBarry Smith if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 17174b967eb1SSatish Balay /* Overlap communication with computation. */ 1718ca9f406cSSatish Balay ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1719a008b906SSatish Balay } 17204b967eb1SSatish Balay if (ll) { 1721e1311b90SBarry Smith ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 172229bbc08cSBarry Smith if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1723f830108cSBarry Smith ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 17244b967eb1SSatish Balay } 17254b967eb1SSatish Balay /* scale the diagonal block */ 1726f830108cSBarry Smith ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 17274b967eb1SSatish Balay 17284b967eb1SSatish Balay if (rr) { 17294b967eb1SSatish Balay /* Do a scatter end and then right scale the off-diagonal block */ 1730ca9f406cSSatish Balay ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1731f830108cSBarry Smith ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 17324b967eb1SSatish Balay } 17334b967eb1SSatish Balay 17343a40ed3dSBarry Smith PetscFunctionReturn(0); 1735a008b906SSatish Balay } 1736a008b906SSatish Balay 17374a2ae208SSatish Balay #undef __FUNCT__ 1738521d7252SBarry Smith #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1739521d7252SBarry Smith PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 17405a838052SSatish Balay { 1741521d7252SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1742521d7252SBarry Smith PetscErrorCode ierr; 1743521d7252SBarry Smith 17443a40ed3dSBarry Smith PetscFunctionBegin; 1745521d7252SBarry Smith ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1746521d7252SBarry Smith ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 17473a40ed3dSBarry Smith PetscFunctionReturn(0); 17485a838052SSatish Balay } 17494a2ae208SSatish Balay #undef __FUNCT__ 17504a2ae208SSatish Balay #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1751dfbe8321SBarry Smith PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1752bb5a7306SBarry Smith { 1753bb5a7306SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1754dfbe8321SBarry Smith PetscErrorCode ierr; 17553a40ed3dSBarry Smith 17563a40ed3dSBarry Smith PetscFunctionBegin; 1757bb5a7306SBarry Smith ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 17583a40ed3dSBarry Smith PetscFunctionReturn(0); 1759bb5a7306SBarry Smith } 1760bb5a7306SBarry Smith 17614a2ae208SSatish Balay #undef __FUNCT__ 17624a2ae208SSatish Balay #define __FUNCT__ "MatEqual_MPIAIJ" 1763dfbe8321SBarry Smith PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1764d4bb536fSBarry Smith { 1765d4bb536fSBarry Smith Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1766d4bb536fSBarry Smith Mat a,b,c,d; 1767d4bb536fSBarry Smith PetscTruth flg; 1768dfbe8321SBarry Smith PetscErrorCode ierr; 1769d4bb536fSBarry Smith 17703a40ed3dSBarry Smith PetscFunctionBegin; 1771d4bb536fSBarry Smith a = matA->A; b = matA->B; 1772d4bb536fSBarry Smith c = matB->A; d = matB->B; 1773d4bb536fSBarry Smith 1774d4bb536fSBarry Smith ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1775abc0a331SBarry Smith if (flg) { 1776d4bb536fSBarry Smith ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1777d4bb536fSBarry Smith } 17787adad957SLisandro Dalcin ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 17793a40ed3dSBarry Smith PetscFunctionReturn(0); 1780d4bb536fSBarry Smith } 1781d4bb536fSBarry Smith 17824a2ae208SSatish Balay #undef __FUNCT__ 17834a2ae208SSatish Balay #define __FUNCT__ "MatCopy_MPIAIJ" 1784dfbe8321SBarry Smith PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1785cb5b572fSBarry Smith { 1786dfbe8321SBarry Smith PetscErrorCode ierr; 1787cb5b572fSBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1788cb5b572fSBarry Smith Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1789cb5b572fSBarry Smith 1790cb5b572fSBarry Smith PetscFunctionBegin; 179133f4a19fSKris Buschelman /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 179233f4a19fSKris Buschelman if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1793cb5b572fSBarry Smith /* because of the column compression in the off-processor part of the matrix a->B, 1794cb5b572fSBarry Smith the number of columns in a->B and b->B may be different, hence we cannot call 1795cb5b572fSBarry Smith the MatCopy() directly on the two parts. If need be, we can provide a more 1796cb5b572fSBarry Smith efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1797cb5b572fSBarry Smith then copying the submatrices */ 1798cb5b572fSBarry Smith ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1799cb5b572fSBarry Smith } else { 1800cb5b572fSBarry Smith ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1801cb5b572fSBarry Smith ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1802cb5b572fSBarry Smith } 1803cb5b572fSBarry Smith PetscFunctionReturn(0); 1804cb5b572fSBarry Smith } 1805cb5b572fSBarry Smith 18064a2ae208SSatish Balay #undef __FUNCT__ 18074a2ae208SSatish Balay #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1808dfbe8321SBarry Smith PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1809273d9f13SBarry Smith { 1810dfbe8321SBarry Smith PetscErrorCode ierr; 1811273d9f13SBarry Smith 1812273d9f13SBarry Smith PetscFunctionBegin; 1813273d9f13SBarry Smith ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1814273d9f13SBarry Smith PetscFunctionReturn(0); 1815273d9f13SBarry Smith } 1816273d9f13SBarry Smith 1817ac90fabeSBarry Smith #include "petscblaslapack.h" 1818ac90fabeSBarry Smith #undef __FUNCT__ 1819ac90fabeSBarry Smith #define __FUNCT__ "MatAXPY_MPIAIJ" 1820f4df32b1SMatthew Knepley PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1821ac90fabeSBarry Smith { 1822dfbe8321SBarry Smith PetscErrorCode ierr; 1823b1d57f15SBarry Smith PetscInt i; 1824ac90fabeSBarry Smith Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 18254ce68768SBarry Smith PetscBLASInt bnz,one=1; 1826ac90fabeSBarry Smith Mat_SeqAIJ *x,*y; 1827ac90fabeSBarry Smith 1828ac90fabeSBarry Smith PetscFunctionBegin; 1829ac90fabeSBarry Smith if (str == SAME_NONZERO_PATTERN) { 1830f4df32b1SMatthew Knepley PetscScalar alpha = a; 1831ac90fabeSBarry Smith x = (Mat_SeqAIJ *)xx->A->data; 1832ac90fabeSBarry Smith y = (Mat_SeqAIJ *)yy->A->data; 18330805154bSBarry Smith bnz = PetscBLASIntCast(x->nz); 1834f4df32b1SMatthew Knepley BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1835ac90fabeSBarry Smith x = (Mat_SeqAIJ *)xx->B->data; 1836ac90fabeSBarry Smith y = (Mat_SeqAIJ *)yy->B->data; 18370805154bSBarry Smith bnz = PetscBLASIntCast(x->nz); 1838f4df32b1SMatthew Knepley BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1839a30b2313SHong Zhang } else if (str == SUBSET_NONZERO_PATTERN) { 1840f4df32b1SMatthew Knepley ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1841c537a176SHong Zhang 1842c537a176SHong Zhang x = (Mat_SeqAIJ *)xx->B->data; 1843a30b2313SHong Zhang y = (Mat_SeqAIJ *)yy->B->data; 1844a30b2313SHong Zhang if (y->xtoy && y->XtoY != xx->B) { 1845a30b2313SHong Zhang ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1846a30b2313SHong Zhang ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1847c537a176SHong Zhang } 1848a30b2313SHong Zhang if (!y->xtoy) { /* get xtoy */ 1849d0f46423SBarry Smith ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1850a30b2313SHong Zhang y->XtoY = xx->B; 1851407f6b05SHong Zhang ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 1852c537a176SHong Zhang } 1853f4df32b1SMatthew Knepley for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1854ac90fabeSBarry Smith } else { 1855f4df32b1SMatthew Knepley ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1856ac90fabeSBarry Smith } 1857ac90fabeSBarry Smith PetscFunctionReturn(0); 1858ac90fabeSBarry Smith } 1859ac90fabeSBarry Smith 1860354c94deSBarry Smith EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1861354c94deSBarry Smith 1862354c94deSBarry Smith #undef __FUNCT__ 1863354c94deSBarry Smith #define __FUNCT__ "MatConjugate_MPIAIJ" 1864354c94deSBarry Smith PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1865354c94deSBarry Smith { 1866354c94deSBarry Smith #if defined(PETSC_USE_COMPLEX) 1867354c94deSBarry Smith PetscErrorCode ierr; 1868354c94deSBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1869354c94deSBarry Smith 1870354c94deSBarry Smith PetscFunctionBegin; 1871354c94deSBarry Smith ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1872354c94deSBarry Smith ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1873354c94deSBarry Smith #else 1874354c94deSBarry Smith PetscFunctionBegin; 1875354c94deSBarry Smith #endif 1876354c94deSBarry Smith PetscFunctionReturn(0); 1877354c94deSBarry Smith } 1878354c94deSBarry Smith 187999cafbc1SBarry Smith #undef __FUNCT__ 188099cafbc1SBarry Smith #define __FUNCT__ "MatRealPart_MPIAIJ" 188199cafbc1SBarry Smith PetscErrorCode MatRealPart_MPIAIJ(Mat A) 188299cafbc1SBarry Smith { 188399cafbc1SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 188499cafbc1SBarry Smith PetscErrorCode ierr; 188599cafbc1SBarry Smith 188699cafbc1SBarry Smith PetscFunctionBegin; 188799cafbc1SBarry Smith ierr = MatRealPart(a->A);CHKERRQ(ierr); 188899cafbc1SBarry Smith ierr = MatRealPart(a->B);CHKERRQ(ierr); 188999cafbc1SBarry Smith PetscFunctionReturn(0); 189099cafbc1SBarry Smith } 189199cafbc1SBarry Smith 189299cafbc1SBarry Smith #undef __FUNCT__ 189399cafbc1SBarry Smith #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 189499cafbc1SBarry Smith PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 189599cafbc1SBarry Smith { 189699cafbc1SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 189799cafbc1SBarry Smith PetscErrorCode ierr; 189899cafbc1SBarry Smith 189999cafbc1SBarry Smith PetscFunctionBegin; 190099cafbc1SBarry Smith ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 190199cafbc1SBarry Smith ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 190299cafbc1SBarry Smith PetscFunctionReturn(0); 190399cafbc1SBarry Smith } 190499cafbc1SBarry Smith 1905103bf8bdSMatthew Knepley #ifdef PETSC_HAVE_PBGL 1906103bf8bdSMatthew Knepley 1907103bf8bdSMatthew Knepley #include <boost/parallel/mpi/bsp_process_group.hpp> 1908a2c909beSMatthew Knepley #include <boost/graph/distributed/ilu_default_graph.hpp> 1909a2c909beSMatthew Knepley #include <boost/graph/distributed/ilu_0_block.hpp> 1910a2c909beSMatthew Knepley #include <boost/graph/distributed/ilu_preconditioner.hpp> 1911103bf8bdSMatthew Knepley #include <boost/graph/distributed/petsc/interface.hpp> 1912a2c909beSMatthew Knepley #include <boost/multi_array.hpp> 1913d0f46423SBarry Smith #include <boost/parallel/distributed_property_map->hpp> 1914103bf8bdSMatthew Knepley 1915103bf8bdSMatthew Knepley #undef __FUNCT__ 1916103bf8bdSMatthew Knepley #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 1917103bf8bdSMatthew Knepley /* 1918103bf8bdSMatthew Knepley This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1919103bf8bdSMatthew Knepley */ 19200481f469SBarry Smith PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1921103bf8bdSMatthew Knepley { 1922a2c909beSMatthew Knepley namespace petsc = boost::distributed::petsc; 1923a2c909beSMatthew Knepley 1924a2c909beSMatthew Knepley namespace graph_dist = boost::graph::distributed; 1925a2c909beSMatthew Knepley using boost::graph::distributed::ilu_default::process_group_type; 1926a2c909beSMatthew Knepley using boost::graph::ilu_permuted; 1927a2c909beSMatthew Knepley 1928103bf8bdSMatthew Knepley PetscTruth row_identity, col_identity; 1929776b82aeSLisandro Dalcin PetscContainer c; 1930103bf8bdSMatthew Knepley PetscInt m, n, M, N; 1931103bf8bdSMatthew Knepley PetscErrorCode ierr; 1932103bf8bdSMatthew Knepley 1933103bf8bdSMatthew Knepley PetscFunctionBegin; 1934103bf8bdSMatthew Knepley if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 1935103bf8bdSMatthew Knepley ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 1936103bf8bdSMatthew Knepley ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 1937103bf8bdSMatthew Knepley if (!row_identity || !col_identity) { 1938103bf8bdSMatthew Knepley SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 1939103bf8bdSMatthew Knepley } 1940103bf8bdSMatthew Knepley 1941103bf8bdSMatthew Knepley process_group_type pg; 1942a2c909beSMatthew Knepley typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1943a2c909beSMatthew Knepley lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 1944a2c909beSMatthew Knepley lgraph_type& level_graph = *lgraph_p; 1945a2c909beSMatthew Knepley graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1946a2c909beSMatthew Knepley 1947103bf8bdSMatthew Knepley petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 1948a2c909beSMatthew Knepley ilu_permuted(level_graph); 1949103bf8bdSMatthew Knepley 1950103bf8bdSMatthew Knepley /* put together the new matrix */ 19517adad957SLisandro Dalcin ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 1952103bf8bdSMatthew Knepley ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 1953103bf8bdSMatthew Knepley ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 1954719d5645SBarry Smith ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 1955719d5645SBarry Smith ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 1956719d5645SBarry Smith ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1957719d5645SBarry Smith ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1958103bf8bdSMatthew Knepley 19597adad957SLisandro Dalcin ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 1960776b82aeSLisandro Dalcin ierr = PetscContainerSetPointer(c, lgraph_p); 1961719d5645SBarry Smith ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 1962103bf8bdSMatthew Knepley PetscFunctionReturn(0); 1963103bf8bdSMatthew Knepley } 1964103bf8bdSMatthew Knepley 1965103bf8bdSMatthew Knepley #undef __FUNCT__ 1966103bf8bdSMatthew Knepley #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 19670481f469SBarry Smith PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 1968103bf8bdSMatthew Knepley { 1969103bf8bdSMatthew Knepley PetscFunctionBegin; 1970103bf8bdSMatthew Knepley PetscFunctionReturn(0); 1971103bf8bdSMatthew Knepley } 1972103bf8bdSMatthew Knepley 1973103bf8bdSMatthew Knepley #undef __FUNCT__ 1974103bf8bdSMatthew Knepley #define __FUNCT__ "MatSolve_MPIAIJ" 1975103bf8bdSMatthew Knepley /* 1976103bf8bdSMatthew Knepley This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1977103bf8bdSMatthew Knepley */ 1978103bf8bdSMatthew Knepley PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 1979103bf8bdSMatthew Knepley { 1980a2c909beSMatthew Knepley namespace graph_dist = boost::graph::distributed; 1981a2c909beSMatthew Knepley 1982a2c909beSMatthew Knepley typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1983a2c909beSMatthew Knepley lgraph_type* lgraph_p; 1984776b82aeSLisandro Dalcin PetscContainer c; 1985103bf8bdSMatthew Knepley PetscErrorCode ierr; 1986103bf8bdSMatthew Knepley 1987103bf8bdSMatthew Knepley PetscFunctionBegin; 1988103bf8bdSMatthew Knepley ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 1989776b82aeSLisandro Dalcin ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 1990103bf8bdSMatthew Knepley ierr = VecCopy(b, x);CHKERRQ(ierr); 1991a2c909beSMatthew Knepley 1992a2c909beSMatthew Knepley PetscScalar* array_x; 1993a2c909beSMatthew Knepley ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 1994a2c909beSMatthew Knepley PetscInt sx; 1995a2c909beSMatthew Knepley ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 1996a2c909beSMatthew Knepley 1997a2c909beSMatthew Knepley PetscScalar* array_b; 1998a2c909beSMatthew Knepley ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 1999a2c909beSMatthew Knepley PetscInt sb; 2000a2c909beSMatthew Knepley ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2001a2c909beSMatthew Knepley 2002a2c909beSMatthew Knepley lgraph_type& level_graph = *lgraph_p; 2003a2c909beSMatthew Knepley graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2004a2c909beSMatthew Knepley 2005a2c909beSMatthew Knepley typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2006a2c909beSMatthew Knepley array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2007a2c909beSMatthew Knepley ref_x(array_x, boost::extents[num_vertices(graph)]); 2008a2c909beSMatthew Knepley 2009a2c909beSMatthew Knepley typedef boost::iterator_property_map<array_ref_type::iterator, 2010a2c909beSMatthew Knepley boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2011a2c909beSMatthew Knepley gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2012a2c909beSMatthew Knepley vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2013a2c909beSMatthew Knepley 2014a2c909beSMatthew Knepley ilu_set_solve(*lgraph_p, vector_b, vector_x); 2015a2c909beSMatthew Knepley 2016103bf8bdSMatthew Knepley PetscFunctionReturn(0); 2017103bf8bdSMatthew Knepley } 2018103bf8bdSMatthew Knepley #endif 2019103bf8bdSMatthew Knepley 202069db28dcSHong Zhang typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 202169db28dcSHong Zhang PetscInt nzlocal,nsends,nrecvs; 2022aa5bb8c0SSatish Balay PetscMPIInt *send_rank; 2023aa5bb8c0SSatish Balay PetscInt *sbuf_nz,*sbuf_j,**rbuf_j; 202469db28dcSHong Zhang PetscScalar *sbuf_a,**rbuf_a; 202569db28dcSHong Zhang PetscErrorCode (*MatDestroy)(Mat); 202669db28dcSHong Zhang } Mat_Redundant; 202769db28dcSHong Zhang 202869db28dcSHong Zhang #undef __FUNCT__ 202969db28dcSHong Zhang #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 203069db28dcSHong Zhang PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 203169db28dcSHong Zhang { 203269db28dcSHong Zhang PetscErrorCode ierr; 203369db28dcSHong Zhang Mat_Redundant *redund=(Mat_Redundant*)ptr; 203469db28dcSHong Zhang PetscInt i; 203569db28dcSHong Zhang 203669db28dcSHong Zhang PetscFunctionBegin; 203769db28dcSHong Zhang ierr = PetscFree(redund->send_rank);CHKERRQ(ierr); 203869db28dcSHong Zhang ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 203969db28dcSHong Zhang ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 204069db28dcSHong Zhang for (i=0; i<redund->nrecvs; i++){ 204169db28dcSHong Zhang ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 204269db28dcSHong Zhang ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 204369db28dcSHong Zhang } 204469db28dcSHong Zhang ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 204569db28dcSHong Zhang ierr = PetscFree(redund);CHKERRQ(ierr); 204669db28dcSHong Zhang PetscFunctionReturn(0); 204769db28dcSHong Zhang } 204869db28dcSHong Zhang 204969db28dcSHong Zhang #undef __FUNCT__ 205069db28dcSHong Zhang #define __FUNCT__ "MatDestroy_MatRedundant" 205169db28dcSHong Zhang PetscErrorCode MatDestroy_MatRedundant(Mat A) 205269db28dcSHong Zhang { 205369db28dcSHong Zhang PetscErrorCode ierr; 205469db28dcSHong Zhang PetscContainer container; 205569db28dcSHong Zhang Mat_Redundant *redund=PETSC_NULL; 205669db28dcSHong Zhang 205769db28dcSHong Zhang PetscFunctionBegin; 205869db28dcSHong Zhang ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 205969db28dcSHong Zhang if (container) { 206069db28dcSHong Zhang ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 206169db28dcSHong Zhang } else { 206269db28dcSHong Zhang SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 206369db28dcSHong Zhang } 206469db28dcSHong Zhang A->ops->destroy = redund->MatDestroy; 206569db28dcSHong Zhang ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 206669db28dcSHong Zhang ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 206769db28dcSHong Zhang ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 206869db28dcSHong Zhang PetscFunctionReturn(0); 206969db28dcSHong Zhang } 207069db28dcSHong Zhang 207169db28dcSHong Zhang #undef __FUNCT__ 207269db28dcSHong Zhang #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 207369db28dcSHong Zhang PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 207469db28dcSHong Zhang { 207569db28dcSHong Zhang PetscMPIInt rank,size; 20767adad957SLisandro Dalcin MPI_Comm comm=((PetscObject)mat)->comm; 207769db28dcSHong Zhang PetscErrorCode ierr; 207869db28dcSHong Zhang PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 207969db28dcSHong Zhang PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2080d0f46423SBarry Smith PetscInt *rowrange=mat->rmap->range; 208169db28dcSHong Zhang Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 208269db28dcSHong Zhang Mat A=aij->A,B=aij->B,C=*matredundant; 208369db28dcSHong Zhang Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 208469db28dcSHong Zhang PetscScalar *sbuf_a; 208569db28dcSHong Zhang PetscInt nzlocal=a->nz+b->nz; 2086d0f46423SBarry Smith PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2087d0f46423SBarry Smith PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 208869db28dcSHong Zhang PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2089a77337e4SBarry Smith MatScalar *aworkA,*aworkB; 2090a77337e4SBarry Smith PetscScalar *vals; 209169db28dcSHong Zhang PetscMPIInt tag1,tag2,tag3,imdex; 209269db28dcSHong Zhang MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 209369db28dcSHong Zhang *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 209469db28dcSHong Zhang MPI_Status recv_status,*send_status; 209569db28dcSHong Zhang PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 209669db28dcSHong Zhang PetscInt **rbuf_j=PETSC_NULL; 209769db28dcSHong Zhang PetscScalar **rbuf_a=PETSC_NULL; 209869db28dcSHong Zhang Mat_Redundant *redund=PETSC_NULL; 209969db28dcSHong Zhang PetscContainer container; 210069db28dcSHong Zhang 210169db28dcSHong Zhang PetscFunctionBegin; 210269db28dcSHong Zhang ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 210369db28dcSHong Zhang ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 210469db28dcSHong Zhang 210569db28dcSHong Zhang if (reuse == MAT_REUSE_MATRIX) { 210669db28dcSHong Zhang ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2107d0f46423SBarry Smith if (M != N || M != mat->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 210869db28dcSHong Zhang ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 210969db28dcSHong Zhang if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 211069db28dcSHong Zhang ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 211169db28dcSHong Zhang if (container) { 211269db28dcSHong Zhang ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 211369db28dcSHong Zhang } else { 211469db28dcSHong Zhang SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 211569db28dcSHong Zhang } 211669db28dcSHong Zhang if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 211769db28dcSHong Zhang 211869db28dcSHong Zhang nsends = redund->nsends; 211969db28dcSHong Zhang nrecvs = redund->nrecvs; 212069db28dcSHong Zhang send_rank = redund->send_rank; recv_rank = send_rank + size; 212169db28dcSHong Zhang sbuf_nz = redund->sbuf_nz; rbuf_nz = sbuf_nz + nsends; 212269db28dcSHong Zhang sbuf_j = redund->sbuf_j; 212369db28dcSHong Zhang sbuf_a = redund->sbuf_a; 212469db28dcSHong Zhang rbuf_j = redund->rbuf_j; 212569db28dcSHong Zhang rbuf_a = redund->rbuf_a; 212669db28dcSHong Zhang } 212769db28dcSHong Zhang 212869db28dcSHong Zhang if (reuse == MAT_INITIAL_MATRIX){ 212969db28dcSHong Zhang PetscMPIInt subrank,subsize; 213069db28dcSHong Zhang PetscInt nleftover,np_subcomm; 213169db28dcSHong Zhang /* get the destination processors' id send_rank, nsends and nrecvs */ 213269db28dcSHong Zhang ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 213369db28dcSHong Zhang ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 213469db28dcSHong Zhang ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank); 213569db28dcSHong Zhang recv_rank = send_rank + size; 213669db28dcSHong Zhang np_subcomm = size/nsubcomm; 213769db28dcSHong Zhang nleftover = size - nsubcomm*np_subcomm; 213869db28dcSHong Zhang nsends = 0; nrecvs = 0; 213969db28dcSHong Zhang for (i=0; i<size; i++){ /* i=rank*/ 214069db28dcSHong Zhang if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 214169db28dcSHong Zhang send_rank[nsends] = i; nsends++; 214269db28dcSHong Zhang recv_rank[nrecvs++] = i; 214369db28dcSHong Zhang } 214469db28dcSHong Zhang } 214569db28dcSHong Zhang if (rank >= size - nleftover){/* this proc is a leftover processor */ 214669db28dcSHong Zhang i = size-nleftover-1; 214769db28dcSHong Zhang j = 0; 214869db28dcSHong Zhang while (j < nsubcomm - nleftover){ 214969db28dcSHong Zhang send_rank[nsends++] = i; 215069db28dcSHong Zhang i--; j++; 215169db28dcSHong Zhang } 215269db28dcSHong Zhang } 215369db28dcSHong Zhang 215469db28dcSHong Zhang if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 215569db28dcSHong Zhang for (i=0; i<nleftover; i++){ 215669db28dcSHong Zhang recv_rank[nrecvs++] = size-nleftover+i; 215769db28dcSHong Zhang } 215869db28dcSHong Zhang } 215969db28dcSHong Zhang 216069db28dcSHong Zhang /* allocate sbuf_j, sbuf_a */ 216169db28dcSHong Zhang i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 216269db28dcSHong Zhang ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 216369db28dcSHong Zhang ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 216469db28dcSHong Zhang } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 216569db28dcSHong Zhang 216669db28dcSHong Zhang /* copy mat's local entries into the buffers */ 216769db28dcSHong Zhang if (reuse == MAT_INITIAL_MATRIX){ 216869db28dcSHong Zhang rownz_max = 0; 216969db28dcSHong Zhang rptr = sbuf_j; 217069db28dcSHong Zhang cols = sbuf_j + rend-rstart + 1; 217169db28dcSHong Zhang vals = sbuf_a; 217269db28dcSHong Zhang rptr[0] = 0; 217369db28dcSHong Zhang for (i=0; i<rend-rstart; i++){ 217469db28dcSHong Zhang row = i + rstart; 217569db28dcSHong Zhang nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 217669db28dcSHong Zhang ncols = nzA + nzB; 217769db28dcSHong Zhang cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 217869db28dcSHong Zhang aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 217969db28dcSHong Zhang /* load the column indices for this row into cols */ 218069db28dcSHong Zhang lwrite = 0; 218169db28dcSHong Zhang for (l=0; l<nzB; l++) { 218269db28dcSHong Zhang if ((ctmp = bmap[cworkB[l]]) < cstart){ 218369db28dcSHong Zhang vals[lwrite] = aworkB[l]; 218469db28dcSHong Zhang cols[lwrite++] = ctmp; 218569db28dcSHong Zhang } 218669db28dcSHong Zhang } 218769db28dcSHong Zhang for (l=0; l<nzA; l++){ 218869db28dcSHong Zhang vals[lwrite] = aworkA[l]; 218969db28dcSHong Zhang cols[lwrite++] = cstart + cworkA[l]; 219069db28dcSHong Zhang } 219169db28dcSHong Zhang for (l=0; l<nzB; l++) { 219269db28dcSHong Zhang if ((ctmp = bmap[cworkB[l]]) >= cend){ 219369db28dcSHong Zhang vals[lwrite] = aworkB[l]; 219469db28dcSHong Zhang cols[lwrite++] = ctmp; 219569db28dcSHong Zhang } 219669db28dcSHong Zhang } 219769db28dcSHong Zhang vals += ncols; 219869db28dcSHong Zhang cols += ncols; 219969db28dcSHong Zhang rptr[i+1] = rptr[i] + ncols; 220069db28dcSHong Zhang if (rownz_max < ncols) rownz_max = ncols; 220169db28dcSHong Zhang } 220269db28dcSHong Zhang if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 220369db28dcSHong Zhang } else { /* only copy matrix values into sbuf_a */ 220469db28dcSHong Zhang rptr = sbuf_j; 220569db28dcSHong Zhang vals = sbuf_a; 220669db28dcSHong Zhang rptr[0] = 0; 220769db28dcSHong Zhang for (i=0; i<rend-rstart; i++){ 220869db28dcSHong Zhang row = i + rstart; 220969db28dcSHong Zhang nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 221069db28dcSHong Zhang ncols = nzA + nzB; 221169db28dcSHong Zhang cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 221269db28dcSHong Zhang aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 221369db28dcSHong Zhang lwrite = 0; 221469db28dcSHong Zhang for (l=0; l<nzB; l++) { 221569db28dcSHong Zhang if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 221669db28dcSHong Zhang } 221769db28dcSHong Zhang for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 221869db28dcSHong Zhang for (l=0; l<nzB; l++) { 221969db28dcSHong Zhang if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 222069db28dcSHong Zhang } 222169db28dcSHong Zhang vals += ncols; 222269db28dcSHong Zhang rptr[i+1] = rptr[i] + ncols; 222369db28dcSHong Zhang } 222469db28dcSHong Zhang } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 222569db28dcSHong Zhang 222669db28dcSHong Zhang /* send nzlocal to others, and recv other's nzlocal */ 222769db28dcSHong Zhang /*--------------------------------------------------*/ 222869db28dcSHong Zhang if (reuse == MAT_INITIAL_MATRIX){ 222969db28dcSHong Zhang ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 223069db28dcSHong Zhang s_waits2 = s_waits3 + nsends; 223169db28dcSHong Zhang s_waits1 = s_waits2 + nsends; 223269db28dcSHong Zhang r_waits1 = s_waits1 + nsends; 223369db28dcSHong Zhang r_waits2 = r_waits1 + nrecvs; 223469db28dcSHong Zhang r_waits3 = r_waits2 + nrecvs; 223569db28dcSHong Zhang } else { 223669db28dcSHong Zhang ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 223769db28dcSHong Zhang r_waits3 = s_waits3 + nsends; 223869db28dcSHong Zhang } 223969db28dcSHong Zhang 224069db28dcSHong Zhang ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 224169db28dcSHong Zhang if (reuse == MAT_INITIAL_MATRIX){ 224269db28dcSHong Zhang /* get new tags to keep the communication clean */ 224369db28dcSHong Zhang ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 224469db28dcSHong Zhang ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 224569db28dcSHong Zhang ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 224669db28dcSHong Zhang rbuf_nz = sbuf_nz + nsends; 224769db28dcSHong Zhang 224869db28dcSHong Zhang /* post receives of other's nzlocal */ 224969db28dcSHong Zhang for (i=0; i<nrecvs; i++){ 225069db28dcSHong Zhang ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 225169db28dcSHong Zhang } 225269db28dcSHong Zhang /* send nzlocal to others */ 225369db28dcSHong Zhang for (i=0; i<nsends; i++){ 225469db28dcSHong Zhang sbuf_nz[i] = nzlocal; 225569db28dcSHong Zhang ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 225669db28dcSHong Zhang } 225769db28dcSHong Zhang /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 225869db28dcSHong Zhang count = nrecvs; 225969db28dcSHong Zhang while (count) { 226069db28dcSHong Zhang ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 226169db28dcSHong Zhang recv_rank[imdex] = recv_status.MPI_SOURCE; 226269db28dcSHong Zhang /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 226369db28dcSHong Zhang ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 226469db28dcSHong Zhang 226569db28dcSHong Zhang i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 226669db28dcSHong Zhang rbuf_nz[imdex] += i + 2; 226769db28dcSHong Zhang ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 226869db28dcSHong Zhang ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 226969db28dcSHong Zhang count--; 227069db28dcSHong Zhang } 227169db28dcSHong Zhang /* wait on sends of nzlocal */ 227269db28dcSHong Zhang if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 227369db28dcSHong Zhang /* send mat->i,j to others, and recv from other's */ 227469db28dcSHong Zhang /*------------------------------------------------*/ 227569db28dcSHong Zhang for (i=0; i<nsends; i++){ 227669db28dcSHong Zhang j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 227769db28dcSHong Zhang ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 227869db28dcSHong Zhang } 227969db28dcSHong Zhang /* wait on receives of mat->i,j */ 228069db28dcSHong Zhang /*------------------------------*/ 228169db28dcSHong Zhang count = nrecvs; 228269db28dcSHong Zhang while (count) { 228369db28dcSHong Zhang ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 228469db28dcSHong Zhang if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 228569db28dcSHong Zhang count--; 228669db28dcSHong Zhang } 228769db28dcSHong Zhang /* wait on sends of mat->i,j */ 228869db28dcSHong Zhang /*---------------------------*/ 228969db28dcSHong Zhang if (nsends) { 229069db28dcSHong Zhang ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 229169db28dcSHong Zhang } 229269db28dcSHong Zhang } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 229369db28dcSHong Zhang 229469db28dcSHong Zhang /* post receives, send and receive mat->a */ 229569db28dcSHong Zhang /*----------------------------------------*/ 229669db28dcSHong Zhang for (imdex=0; imdex<nrecvs; imdex++) { 229769db28dcSHong Zhang ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 229869db28dcSHong Zhang } 229969db28dcSHong Zhang for (i=0; i<nsends; i++){ 230069db28dcSHong Zhang ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 230169db28dcSHong Zhang } 230269db28dcSHong Zhang count = nrecvs; 230369db28dcSHong Zhang while (count) { 230469db28dcSHong Zhang ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 230569db28dcSHong Zhang if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 230669db28dcSHong Zhang count--; 230769db28dcSHong Zhang } 230869db28dcSHong Zhang if (nsends) { 230969db28dcSHong Zhang ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 231069db28dcSHong Zhang } 231169db28dcSHong Zhang 231269db28dcSHong Zhang ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 231369db28dcSHong Zhang 231469db28dcSHong Zhang /* create redundant matrix */ 231569db28dcSHong Zhang /*-------------------------*/ 231669db28dcSHong Zhang if (reuse == MAT_INITIAL_MATRIX){ 231769db28dcSHong Zhang /* compute rownz_max for preallocation */ 231869db28dcSHong Zhang for (imdex=0; imdex<nrecvs; imdex++){ 231969db28dcSHong Zhang j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 232069db28dcSHong Zhang rptr = rbuf_j[imdex]; 232169db28dcSHong Zhang for (i=0; i<j; i++){ 232269db28dcSHong Zhang ncols = rptr[i+1] - rptr[i]; 232369db28dcSHong Zhang if (rownz_max < ncols) rownz_max = ncols; 232469db28dcSHong Zhang } 232569db28dcSHong Zhang } 232669db28dcSHong Zhang 232769db28dcSHong Zhang ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 232869db28dcSHong Zhang ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 232969db28dcSHong Zhang ierr = MatSetFromOptions(C);CHKERRQ(ierr); 233069db28dcSHong Zhang ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 233169db28dcSHong Zhang ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 233269db28dcSHong Zhang } else { 233369db28dcSHong Zhang C = *matredundant; 233469db28dcSHong Zhang } 233569db28dcSHong Zhang 233669db28dcSHong Zhang /* insert local matrix entries */ 233769db28dcSHong Zhang rptr = sbuf_j; 233869db28dcSHong Zhang cols = sbuf_j + rend-rstart + 1; 233969db28dcSHong Zhang vals = sbuf_a; 234069db28dcSHong Zhang for (i=0; i<rend-rstart; i++){ 234169db28dcSHong Zhang row = i + rstart; 234269db28dcSHong Zhang ncols = rptr[i+1] - rptr[i]; 234369db28dcSHong Zhang ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 234469db28dcSHong Zhang vals += ncols; 234569db28dcSHong Zhang cols += ncols; 234669db28dcSHong Zhang } 234769db28dcSHong Zhang /* insert received matrix entries */ 234869db28dcSHong Zhang for (imdex=0; imdex<nrecvs; imdex++){ 234969db28dcSHong Zhang rstart = rowrange[recv_rank[imdex]]; 235069db28dcSHong Zhang rend = rowrange[recv_rank[imdex]+1]; 235169db28dcSHong Zhang rptr = rbuf_j[imdex]; 235269db28dcSHong Zhang cols = rbuf_j[imdex] + rend-rstart + 1; 235369db28dcSHong Zhang vals = rbuf_a[imdex]; 235469db28dcSHong Zhang for (i=0; i<rend-rstart; i++){ 235569db28dcSHong Zhang row = i + rstart; 235669db28dcSHong Zhang ncols = rptr[i+1] - rptr[i]; 235769db28dcSHong Zhang ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 235869db28dcSHong Zhang vals += ncols; 235969db28dcSHong Zhang cols += ncols; 236069db28dcSHong Zhang } 236169db28dcSHong Zhang } 236269db28dcSHong Zhang ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 236369db28dcSHong Zhang ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 236469db28dcSHong Zhang ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2365d0f46423SBarry Smith if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 236669db28dcSHong Zhang if (reuse == MAT_INITIAL_MATRIX){ 236769db28dcSHong Zhang PetscContainer container; 236869db28dcSHong Zhang *matredundant = C; 236969db28dcSHong Zhang /* create a supporting struct and attach it to C for reuse */ 237038f2d2fdSLisandro Dalcin ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 237169db28dcSHong Zhang ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 237269db28dcSHong Zhang ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 237369db28dcSHong Zhang ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 237469db28dcSHong Zhang ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 237569db28dcSHong Zhang 237669db28dcSHong Zhang redund->nzlocal = nzlocal; 237769db28dcSHong Zhang redund->nsends = nsends; 237869db28dcSHong Zhang redund->nrecvs = nrecvs; 237969db28dcSHong Zhang redund->send_rank = send_rank; 238069db28dcSHong Zhang redund->sbuf_nz = sbuf_nz; 238169db28dcSHong Zhang redund->sbuf_j = sbuf_j; 238269db28dcSHong Zhang redund->sbuf_a = sbuf_a; 238369db28dcSHong Zhang redund->rbuf_j = rbuf_j; 238469db28dcSHong Zhang redund->rbuf_a = rbuf_a; 238569db28dcSHong Zhang 238669db28dcSHong Zhang redund->MatDestroy = C->ops->destroy; 238769db28dcSHong Zhang C->ops->destroy = MatDestroy_MatRedundant; 238869db28dcSHong Zhang } 238969db28dcSHong Zhang PetscFunctionReturn(0); 239069db28dcSHong Zhang } 239169db28dcSHong Zhang 239203bc72f1SMatthew Knepley #undef __FUNCT__ 2393c91732d9SHong Zhang #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2394c91732d9SHong Zhang PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2395c91732d9SHong Zhang { 2396c91732d9SHong Zhang Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2397c91732d9SHong Zhang PetscErrorCode ierr; 2398c91732d9SHong Zhang PetscInt i,*idxb = 0; 2399c91732d9SHong Zhang PetscScalar *va,*vb; 2400c91732d9SHong Zhang Vec vtmp; 2401c91732d9SHong Zhang 2402c91732d9SHong Zhang PetscFunctionBegin; 2403c91732d9SHong Zhang ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2404c91732d9SHong Zhang ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2405c91732d9SHong Zhang if (idx) { 2406192daf7cSBarry Smith for (i=0; i<A->rmap->n; i++) { 2407d0f46423SBarry Smith if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2408c91732d9SHong Zhang } 2409c91732d9SHong Zhang } 2410c91732d9SHong Zhang 2411d0f46423SBarry Smith ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2412c91732d9SHong Zhang if (idx) { 2413d0f46423SBarry Smith ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2414c91732d9SHong Zhang } 2415c91732d9SHong Zhang ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2416c91732d9SHong Zhang ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2417c91732d9SHong Zhang 2418d0f46423SBarry Smith for (i=0; i<A->rmap->n; i++){ 2419c91732d9SHong Zhang if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2420c91732d9SHong Zhang va[i] = vb[i]; 2421c91732d9SHong Zhang if (idx) idx[i] = a->garray[idxb[i]]; 2422c91732d9SHong Zhang } 2423c91732d9SHong Zhang } 2424c91732d9SHong Zhang 2425c91732d9SHong Zhang ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2426c91732d9SHong Zhang ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2427c91732d9SHong Zhang if (idxb) { 2428c91732d9SHong Zhang ierr = PetscFree(idxb);CHKERRQ(ierr); 2429c91732d9SHong Zhang } 2430c91732d9SHong Zhang ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2431c91732d9SHong Zhang PetscFunctionReturn(0); 2432c91732d9SHong Zhang } 2433c91732d9SHong Zhang 2434c91732d9SHong Zhang #undef __FUNCT__ 2435c87e5d42SMatthew Knepley #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2436c87e5d42SMatthew Knepley PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2437c87e5d42SMatthew Knepley { 2438c87e5d42SMatthew Knepley Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2439c87e5d42SMatthew Knepley PetscErrorCode ierr; 2440c87e5d42SMatthew Knepley PetscInt i,*idxb = 0; 2441c87e5d42SMatthew Knepley PetscScalar *va,*vb; 2442c87e5d42SMatthew Knepley Vec vtmp; 2443c87e5d42SMatthew Knepley 2444c87e5d42SMatthew Knepley PetscFunctionBegin; 2445c87e5d42SMatthew Knepley ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2446c87e5d42SMatthew Knepley ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2447c87e5d42SMatthew Knepley if (idx) { 2448c87e5d42SMatthew Knepley for (i=0; i<A->cmap->n; i++) { 2449c87e5d42SMatthew Knepley if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2450c87e5d42SMatthew Knepley } 2451c87e5d42SMatthew Knepley } 2452c87e5d42SMatthew Knepley 2453c87e5d42SMatthew Knepley ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2454c87e5d42SMatthew Knepley if (idx) { 2455c87e5d42SMatthew Knepley ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2456c87e5d42SMatthew Knepley } 2457c87e5d42SMatthew Knepley ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2458c87e5d42SMatthew Knepley ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2459c87e5d42SMatthew Knepley 2460c87e5d42SMatthew Knepley for (i=0; i<A->rmap->n; i++){ 2461c87e5d42SMatthew Knepley if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2462c87e5d42SMatthew Knepley va[i] = vb[i]; 2463c87e5d42SMatthew Knepley if (idx) idx[i] = a->garray[idxb[i]]; 2464c87e5d42SMatthew Knepley } 2465c87e5d42SMatthew Knepley } 2466c87e5d42SMatthew Knepley 2467c87e5d42SMatthew Knepley ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2468c87e5d42SMatthew Knepley ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2469c87e5d42SMatthew Knepley if (idxb) { 2470c87e5d42SMatthew Knepley ierr = PetscFree(idxb);CHKERRQ(ierr); 2471c87e5d42SMatthew Knepley } 2472c87e5d42SMatthew Knepley ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2473c87e5d42SMatthew Knepley PetscFunctionReturn(0); 2474c87e5d42SMatthew Knepley } 2475c87e5d42SMatthew Knepley 2476c87e5d42SMatthew Knepley #undef __FUNCT__ 247703bc72f1SMatthew Knepley #define __FUNCT__ "MatGetRowMin_MPIAIJ" 247803bc72f1SMatthew Knepley PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 247903bc72f1SMatthew Knepley { 248003bc72f1SMatthew Knepley Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2481d0f46423SBarry Smith PetscInt n = A->rmap->n; 2482d0f46423SBarry Smith PetscInt cstart = A->cmap->rstart; 248303bc72f1SMatthew Knepley PetscInt *cmap = mat->garray; 248403bc72f1SMatthew Knepley PetscInt *diagIdx, *offdiagIdx; 248503bc72f1SMatthew Knepley Vec diagV, offdiagV; 248603bc72f1SMatthew Knepley PetscScalar *a, *diagA, *offdiagA; 248703bc72f1SMatthew Knepley PetscInt r; 248803bc72f1SMatthew Knepley PetscErrorCode ierr; 248903bc72f1SMatthew Knepley 249003bc72f1SMatthew Knepley PetscFunctionBegin; 249103bc72f1SMatthew Knepley ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2492e64afeacSLisandro Dalcin ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2493e64afeacSLisandro Dalcin ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 249403bc72f1SMatthew Knepley ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 249503bc72f1SMatthew Knepley ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 249603bc72f1SMatthew Knepley ierr = VecGetArray(v, &a);CHKERRQ(ierr); 249703bc72f1SMatthew Knepley ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 249803bc72f1SMatthew Knepley ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 249903bc72f1SMatthew Knepley for(r = 0; r < n; ++r) { 2500028cd4eaSSatish Balay if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 250103bc72f1SMatthew Knepley a[r] = diagA[r]; 250203bc72f1SMatthew Knepley idx[r] = cstart + diagIdx[r]; 250303bc72f1SMatthew Knepley } else { 250403bc72f1SMatthew Knepley a[r] = offdiagA[r]; 250503bc72f1SMatthew Knepley idx[r] = cmap[offdiagIdx[r]]; 250603bc72f1SMatthew Knepley } 250703bc72f1SMatthew Knepley } 250803bc72f1SMatthew Knepley ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 250903bc72f1SMatthew Knepley ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 251003bc72f1SMatthew Knepley ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 251103bc72f1SMatthew Knepley ierr = VecDestroy(diagV);CHKERRQ(ierr); 251203bc72f1SMatthew Knepley ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 251303bc72f1SMatthew Knepley ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 251403bc72f1SMatthew Knepley PetscFunctionReturn(0); 251503bc72f1SMatthew Knepley } 251603bc72f1SMatthew Knepley 25175494a064SHong Zhang #undef __FUNCT__ 2518c87e5d42SMatthew Knepley #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2519c87e5d42SMatthew Knepley PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2520c87e5d42SMatthew Knepley { 2521c87e5d42SMatthew Knepley Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2522c87e5d42SMatthew Knepley PetscInt n = A->rmap->n; 2523c87e5d42SMatthew Knepley PetscInt cstart = A->cmap->rstart; 2524c87e5d42SMatthew Knepley PetscInt *cmap = mat->garray; 2525c87e5d42SMatthew Knepley PetscInt *diagIdx, *offdiagIdx; 2526c87e5d42SMatthew Knepley Vec diagV, offdiagV; 2527c87e5d42SMatthew Knepley PetscScalar *a, *diagA, *offdiagA; 2528c87e5d42SMatthew Knepley PetscInt r; 2529c87e5d42SMatthew Knepley PetscErrorCode ierr; 2530c87e5d42SMatthew Knepley 2531c87e5d42SMatthew Knepley PetscFunctionBegin; 2532c87e5d42SMatthew Knepley ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2533c87e5d42SMatthew Knepley ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2534c87e5d42SMatthew Knepley ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2535c87e5d42SMatthew Knepley ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2536c87e5d42SMatthew Knepley ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2537c87e5d42SMatthew Knepley ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2538c87e5d42SMatthew Knepley ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2539c87e5d42SMatthew Knepley ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2540c87e5d42SMatthew Knepley for(r = 0; r < n; ++r) { 2541c87e5d42SMatthew Knepley if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2542c87e5d42SMatthew Knepley a[r] = diagA[r]; 2543c87e5d42SMatthew Knepley idx[r] = cstart + diagIdx[r]; 2544c87e5d42SMatthew Knepley } else { 2545c87e5d42SMatthew Knepley a[r] = offdiagA[r]; 2546c87e5d42SMatthew Knepley idx[r] = cmap[offdiagIdx[r]]; 2547c87e5d42SMatthew Knepley } 2548c87e5d42SMatthew Knepley } 2549c87e5d42SMatthew Knepley ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2550c87e5d42SMatthew Knepley ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2551c87e5d42SMatthew Knepley ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2552c87e5d42SMatthew Knepley ierr = VecDestroy(diagV);CHKERRQ(ierr); 2553c87e5d42SMatthew Knepley ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2554c87e5d42SMatthew Knepley ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2555c87e5d42SMatthew Knepley PetscFunctionReturn(0); 2556c87e5d42SMatthew Knepley } 2557c87e5d42SMatthew Knepley 2558c87e5d42SMatthew Knepley #undef __FUNCT__ 2559829201f2SHong Zhang #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ" 2560f6d58c54SBarry Smith PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat) 25615494a064SHong Zhang { 25625494a064SHong Zhang PetscErrorCode ierr; 2563f6d58c54SBarry Smith Mat *dummy; 25645494a064SHong Zhang 25655494a064SHong Zhang PetscFunctionBegin; 2566f6d58c54SBarry Smith ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2567f6d58c54SBarry Smith *newmat = *dummy; 2568f6d58c54SBarry Smith ierr = PetscFree(dummy);CHKERRQ(ierr); 25695494a064SHong Zhang PetscFunctionReturn(0); 25705494a064SHong Zhang } 25715494a064SHong Zhang 25723acb8795SBarry Smith extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 25738a729477SBarry Smith /* -------------------------------------------------------------------*/ 2574cda55fadSBarry Smith static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2575cda55fadSBarry Smith MatGetRow_MPIAIJ, 2576cda55fadSBarry Smith MatRestoreRow_MPIAIJ, 2577cda55fadSBarry Smith MatMult_MPIAIJ, 257897304618SKris Buschelman /* 4*/ MatMultAdd_MPIAIJ, 25797c922b88SBarry Smith MatMultTranspose_MPIAIJ, 25807c922b88SBarry Smith MatMultTransposeAdd_MPIAIJ, 2581103bf8bdSMatthew Knepley #ifdef PETSC_HAVE_PBGL 2582103bf8bdSMatthew Knepley MatSolve_MPIAIJ, 2583103bf8bdSMatthew Knepley #else 2584cda55fadSBarry Smith 0, 2585103bf8bdSMatthew Knepley #endif 2586cda55fadSBarry Smith 0, 2587cda55fadSBarry Smith 0, 258897304618SKris Buschelman /*10*/ 0, 2589cda55fadSBarry Smith 0, 2590cda55fadSBarry Smith 0, 259144a69424SLois Curfman McInnes MatRelax_MPIAIJ, 2592b7c46309SBarry Smith MatTranspose_MPIAIJ, 259397304618SKris Buschelman /*15*/ MatGetInfo_MPIAIJ, 2594cda55fadSBarry Smith MatEqual_MPIAIJ, 2595cda55fadSBarry Smith MatGetDiagonal_MPIAIJ, 2596cda55fadSBarry Smith MatDiagonalScale_MPIAIJ, 2597cda55fadSBarry Smith MatNorm_MPIAIJ, 259897304618SKris Buschelman /*20*/ MatAssemblyBegin_MPIAIJ, 2599cda55fadSBarry Smith MatAssemblyEnd_MPIAIJ, 2600cda55fadSBarry Smith MatSetOption_MPIAIJ, 2601cda55fadSBarry Smith MatZeroEntries_MPIAIJ, 2602d519adbfSMatthew Knepley /*24*/ MatZeroRows_MPIAIJ, 2603cda55fadSBarry Smith 0, 2604103bf8bdSMatthew Knepley #ifdef PETSC_HAVE_PBGL 2605719d5645SBarry Smith 0, 2606103bf8bdSMatthew Knepley #else 2607cda55fadSBarry Smith 0, 2608103bf8bdSMatthew Knepley #endif 2609cda55fadSBarry Smith 0, 2610cda55fadSBarry Smith 0, 2611d519adbfSMatthew Knepley /*29*/ MatSetUpPreallocation_MPIAIJ, 2612103bf8bdSMatthew Knepley #ifdef PETSC_HAVE_PBGL 2613719d5645SBarry Smith 0, 2614103bf8bdSMatthew Knepley #else 2615cda55fadSBarry Smith 0, 2616103bf8bdSMatthew Knepley #endif 2617cda55fadSBarry Smith 0, 2618cda55fadSBarry Smith 0, 2619cda55fadSBarry Smith 0, 2620d519adbfSMatthew Knepley /*34*/ MatDuplicate_MPIAIJ, 2621cda55fadSBarry Smith 0, 2622cda55fadSBarry Smith 0, 2623cda55fadSBarry Smith 0, 2624cda55fadSBarry Smith 0, 2625d519adbfSMatthew Knepley /*39*/ MatAXPY_MPIAIJ, 2626cda55fadSBarry Smith MatGetSubMatrices_MPIAIJ, 2627cda55fadSBarry Smith MatIncreaseOverlap_MPIAIJ, 2628cda55fadSBarry Smith MatGetValues_MPIAIJ, 2629cb5b572fSBarry Smith MatCopy_MPIAIJ, 2630d519adbfSMatthew Knepley /*44*/ MatGetRowMax_MPIAIJ, 2631cda55fadSBarry Smith MatScale_MPIAIJ, 2632cda55fadSBarry Smith 0, 2633cda55fadSBarry Smith 0, 2634cda55fadSBarry Smith 0, 2635d519adbfSMatthew Knepley /*49*/ MatSetBlockSize_MPIAIJ, 2636cda55fadSBarry Smith 0, 2637cda55fadSBarry Smith 0, 2638cda55fadSBarry Smith 0, 2639cda55fadSBarry Smith 0, 2640d519adbfSMatthew Knepley /*54*/ MatFDColoringCreate_MPIAIJ, 2641cda55fadSBarry Smith 0, 2642cda55fadSBarry Smith MatSetUnfactored_MPIAIJ, 264342e855d1Svictor MatPermute_MPIAIJ, 2644cda55fadSBarry Smith 0, 2645d519adbfSMatthew Knepley /*59*/ MatGetSubMatrix_MPIAIJ, 2646e03a110bSBarry Smith MatDestroy_MPIAIJ, 2647e03a110bSBarry Smith MatView_MPIAIJ, 2648357abbc8SBarry Smith 0, 2649a2243be0SBarry Smith 0, 2650d519adbfSMatthew Knepley /*64*/ 0, 2651a2243be0SBarry Smith 0, 2652a2243be0SBarry Smith 0, 2653a2243be0SBarry Smith 0, 2654a2243be0SBarry Smith 0, 2655d519adbfSMatthew Knepley /*69*/ MatGetRowMaxAbs_MPIAIJ, 2656c87e5d42SMatthew Knepley MatGetRowMinAbs_MPIAIJ, 2657a2243be0SBarry Smith 0, 2658a2243be0SBarry Smith MatSetColoring_MPIAIJ, 2659dcf5cc72SBarry Smith #if defined(PETSC_HAVE_ADIC) 2660779c1a83SBarry Smith MatSetValuesAdic_MPIAIJ, 2661dcf5cc72SBarry Smith #else 2662dcf5cc72SBarry Smith 0, 2663dcf5cc72SBarry Smith #endif 266497304618SKris Buschelman MatSetValuesAdifor_MPIAIJ, 26653acb8795SBarry Smith /*75*/ MatFDColoringApply_AIJ, 266697304618SKris Buschelman 0, 266797304618SKris Buschelman 0, 266897304618SKris Buschelman 0, 266997304618SKris Buschelman 0, 267097304618SKris Buschelman /*80*/ 0, 267197304618SKris Buschelman 0, 267297304618SKris Buschelman 0, 2673d519adbfSMatthew Knepley /*83*/ MatLoad_MPIAIJ, 26746284ec50SHong Zhang 0, 26756284ec50SHong Zhang 0, 26766284ec50SHong Zhang 0, 26776284ec50SHong Zhang 0, 2678865e5f61SKris Buschelman 0, 2679d519adbfSMatthew Knepley /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 268026be0446SHong Zhang MatMatMultSymbolic_MPIAIJ_MPIAIJ, 268126be0446SHong Zhang MatMatMultNumeric_MPIAIJ_MPIAIJ, 26827a7894deSKris Buschelman MatPtAP_Basic, 26837a7894deSKris Buschelman MatPtAPSymbolic_MPIAIJ, 2684d519adbfSMatthew Knepley /*94*/ MatPtAPNumeric_MPIAIJ, 26857a7894deSKris Buschelman 0, 26867a7894deSKris Buschelman 0, 26877a7894deSKris Buschelman 0, 26887a7894deSKris Buschelman 0, 2689d519adbfSMatthew Knepley /*99*/ 0, 2690865e5f61SKris Buschelman MatPtAPSymbolic_MPIAIJ_MPIAIJ, 26917a7894deSKris Buschelman MatPtAPNumeric_MPIAIJ_MPIAIJ, 26922fd7e33dSBarry Smith MatConjugate_MPIAIJ, 26932fd7e33dSBarry Smith 0, 2694d519adbfSMatthew Knepley /*104*/MatSetValuesRow_MPIAIJ, 269599cafbc1SBarry Smith MatRealPart_MPIAIJ, 269669db28dcSHong Zhang MatImaginaryPart_MPIAIJ, 269769db28dcSHong Zhang 0, 269869db28dcSHong Zhang 0, 2699d519adbfSMatthew Knepley /*109*/0, 270003bc72f1SMatthew Knepley MatGetRedundantMatrix_MPIAIJ, 27015494a064SHong Zhang MatGetRowMin_MPIAIJ, 27025494a064SHong Zhang 0, 27035494a064SHong Zhang 0, 2704bd0c2dcbSBarry Smith /*114*/MatGetSeqNonzerostructure_MPIAIJ, 2705bd0c2dcbSBarry Smith 0, 2706bd0c2dcbSBarry Smith 0, 2707bd0c2dcbSBarry Smith 0, 2708bd0c2dcbSBarry Smith 0, 2709bd0c2dcbSBarry Smith 0 2710bd0c2dcbSBarry Smith }; 271136ce4990SBarry Smith 27122e8a6d31SBarry Smith /* ----------------------------------------------------------------------------------------*/ 27132e8a6d31SBarry Smith 2714fb2e594dSBarry Smith EXTERN_C_BEGIN 27154a2ae208SSatish Balay #undef __FUNCT__ 27164a2ae208SSatish Balay #define __FUNCT__ "MatStoreValues_MPIAIJ" 2717be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 27182e8a6d31SBarry Smith { 27192e8a6d31SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2720dfbe8321SBarry Smith PetscErrorCode ierr; 27212e8a6d31SBarry Smith 27222e8a6d31SBarry Smith PetscFunctionBegin; 27232e8a6d31SBarry Smith ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 27242e8a6d31SBarry Smith ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 27252e8a6d31SBarry Smith PetscFunctionReturn(0); 27262e8a6d31SBarry Smith } 2727fb2e594dSBarry Smith EXTERN_C_END 27282e8a6d31SBarry Smith 2729fb2e594dSBarry Smith EXTERN_C_BEGIN 27304a2ae208SSatish Balay #undef __FUNCT__ 27314a2ae208SSatish Balay #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2732be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 27332e8a6d31SBarry Smith { 27342e8a6d31SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2735dfbe8321SBarry Smith PetscErrorCode ierr; 27362e8a6d31SBarry Smith 27372e8a6d31SBarry Smith PetscFunctionBegin; 27382e8a6d31SBarry Smith ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 27392e8a6d31SBarry Smith ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 27402e8a6d31SBarry Smith PetscFunctionReturn(0); 27412e8a6d31SBarry Smith } 2742fb2e594dSBarry Smith EXTERN_C_END 27438a729477SBarry Smith 2744e090d566SSatish Balay #include "petscpc.h" 274527508adbSBarry Smith EXTERN_C_BEGIN 27464a2ae208SSatish Balay #undef __FUNCT__ 2747a23d5eceSKris Buschelman #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2748be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2749a23d5eceSKris Buschelman { 2750a23d5eceSKris Buschelman Mat_MPIAIJ *b; 2751dfbe8321SBarry Smith PetscErrorCode ierr; 2752b1d57f15SBarry Smith PetscInt i; 2753a23d5eceSKris Buschelman 2754a23d5eceSKris Buschelman PetscFunctionBegin; 2755a23d5eceSKris Buschelman if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2756a23d5eceSKris Buschelman if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 275777431f27SBarry Smith if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 275877431f27SBarry Smith if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2759899cda47SBarry Smith 27607408324eSLisandro Dalcin ierr = PetscMapSetBlockSize(B->rmap,1);CHKERRQ(ierr); 27617408324eSLisandro Dalcin ierr = PetscMapSetBlockSize(B->cmap,1);CHKERRQ(ierr); 2762d0f46423SBarry Smith ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr); 2763d0f46423SBarry Smith ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr); 2764a23d5eceSKris Buschelman if (d_nnz) { 2765d0f46423SBarry Smith for (i=0; i<B->rmap->n; i++) { 276677431f27SBarry Smith if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 2767a23d5eceSKris Buschelman } 2768a23d5eceSKris Buschelman } 2769a23d5eceSKris Buschelman if (o_nnz) { 2770d0f46423SBarry Smith for (i=0; i<B->rmap->n; i++) { 277177431f27SBarry Smith if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 2772a23d5eceSKris Buschelman } 2773a23d5eceSKris Buschelman } 2774a23d5eceSKris Buschelman b = (Mat_MPIAIJ*)B->data; 2775899cda47SBarry Smith 2776526dfc15SBarry Smith if (!B->preallocated) { 2777899cda47SBarry Smith /* Explicitly create 2 MATSEQAIJ matrices. */ 2778899cda47SBarry Smith ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2779d0f46423SBarry Smith ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2780899cda47SBarry Smith ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2781899cda47SBarry Smith ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2782899cda47SBarry Smith ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2783d0f46423SBarry Smith ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2784899cda47SBarry Smith ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2785899cda47SBarry Smith ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2786526dfc15SBarry Smith } 2787899cda47SBarry Smith 2788c60e587dSKris Buschelman ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2789c60e587dSKris Buschelman ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2790526dfc15SBarry Smith B->preallocated = PETSC_TRUE; 2791a23d5eceSKris Buschelman PetscFunctionReturn(0); 2792a23d5eceSKris Buschelman } 2793a23d5eceSKris Buschelman EXTERN_C_END 2794a23d5eceSKris Buschelman 27954a2ae208SSatish Balay #undef __FUNCT__ 27964a2ae208SSatish Balay #define __FUNCT__ "MatDuplicate_MPIAIJ" 2797dfbe8321SBarry Smith PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2798d6dfbf8fSBarry Smith { 2799d6dfbf8fSBarry Smith Mat mat; 2800416022c9SBarry Smith Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2801dfbe8321SBarry Smith PetscErrorCode ierr; 2802d6dfbf8fSBarry Smith 28033a40ed3dSBarry Smith PetscFunctionBegin; 2804416022c9SBarry Smith *newmat = 0; 28057adad957SLisandro Dalcin ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2806d0f46423SBarry Smith ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 28077adad957SLisandro Dalcin ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 28081d5dac46SHong Zhang ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2809273d9f13SBarry Smith a = (Mat_MPIAIJ*)mat->data; 2810e1b6402fSHong Zhang 2811d6dfbf8fSBarry Smith mat->factor = matin->factor; 2812d0f46423SBarry Smith mat->rmap->bs = matin->rmap->bs; 2813c456f294SBarry Smith mat->assembled = PETSC_TRUE; 2814e7641de0SSatish Balay mat->insertmode = NOT_SET_VALUES; 2815273d9f13SBarry Smith mat->preallocated = PETSC_TRUE; 2816d6dfbf8fSBarry Smith 281717699dbbSLois Curfman McInnes a->size = oldmat->size; 281817699dbbSLois Curfman McInnes a->rank = oldmat->rank; 2819e7641de0SSatish Balay a->donotstash = oldmat->donotstash; 2820e7641de0SSatish Balay a->roworiented = oldmat->roworiented; 2821e7641de0SSatish Balay a->rowindices = 0; 2822bcd2baecSBarry Smith a->rowvalues = 0; 2823bcd2baecSBarry Smith a->getrowactive = PETSC_FALSE; 2824d6dfbf8fSBarry Smith 2825d0f46423SBarry Smith ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->rmap,mat->rmap);CHKERRQ(ierr); 2826d0f46423SBarry Smith ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->cmap,mat->cmap);CHKERRQ(ierr); 2827899cda47SBarry Smith 28287adad957SLisandro Dalcin ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr); 28292ee70a88SLois Curfman McInnes if (oldmat->colmap) { 2830aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 28310f5bd95cSBarry Smith ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2832b1fc9764SSatish Balay #else 2833d0f46423SBarry Smith ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2834d0f46423SBarry Smith ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2835d0f46423SBarry Smith ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2836b1fc9764SSatish Balay #endif 2837416022c9SBarry Smith } else a->colmap = 0; 28383f41c07dSBarry Smith if (oldmat->garray) { 2839b1d57f15SBarry Smith PetscInt len; 2840d0f46423SBarry Smith len = oldmat->B->cmap->n; 2841b1d57f15SBarry Smith ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 284252e6d16bSBarry Smith ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2843b1d57f15SBarry Smith if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2844416022c9SBarry Smith } else a->garray = 0; 2845d6dfbf8fSBarry Smith 2846416022c9SBarry Smith ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 284752e6d16bSBarry Smith ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2848a56f8943SBarry Smith ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 284952e6d16bSBarry Smith ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 28502e8a6d31SBarry Smith ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 285152e6d16bSBarry Smith ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 28522e8a6d31SBarry Smith ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 285352e6d16bSBarry Smith ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 28547adad957SLisandro Dalcin ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 28558a729477SBarry Smith *newmat = mat; 28563a40ed3dSBarry Smith PetscFunctionReturn(0); 28578a729477SBarry Smith } 2858416022c9SBarry Smith 2859e090d566SSatish Balay #include "petscsys.h" 2860416022c9SBarry Smith 28614a2ae208SSatish Balay #undef __FUNCT__ 28624a2ae208SSatish Balay #define __FUNCT__ "MatLoad_MPIAIJ" 2863a313700dSBarry Smith PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, const MatType type,Mat *newmat) 2864416022c9SBarry Smith { 2865d65a2f8fSBarry Smith Mat A; 286687828ca2SBarry Smith PetscScalar *vals,*svals; 286719bcc07fSBarry Smith MPI_Comm comm = ((PetscObject)viewer)->comm; 2868416022c9SBarry Smith MPI_Status status; 28696849ba73SBarry Smith PetscErrorCode ierr; 287013980483SBarry Smith PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,mpicnt,mpimaxnz; 28717e042019SMatthew Knepley PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2872b1d57f15SBarry Smith PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2873910ba992SMatthew Knepley PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 2874dc231df0SBarry Smith PetscInt cend,cstart,n,*rowners; 2875b1d57f15SBarry Smith int fd; 2876416022c9SBarry Smith 28773a40ed3dSBarry Smith PetscFunctionBegin; 28781dab6e02SBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 28791dab6e02SBarry Smith ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 288017699dbbSLois Curfman McInnes if (!rank) { 2881b0a32e0cSBarry Smith ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 28820752156aSBarry Smith ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2883552e946dSBarry Smith if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 28846c5fab8fSBarry Smith } 28856c5fab8fSBarry Smith 2886b1d57f15SBarry Smith ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2887416022c9SBarry Smith M = header[1]; N = header[2]; 2888416022c9SBarry Smith /* determine ownership of all rows */ 288929cdbbc8SSatish Balay m = M/size + ((M % size) > rank); 2890dc231df0SBarry Smith ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2891dc231df0SBarry Smith ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2892167e7480SBarry Smith 2893167e7480SBarry Smith /* First process needs enough room for process with most rows */ 2894167e7480SBarry Smith if (!rank) { 2895167e7480SBarry Smith mmax = rowners[1]; 2896167e7480SBarry Smith for (i=2; i<size; i++) { 2897167e7480SBarry Smith mmax = PetscMax(mmax,rowners[i]); 2898167e7480SBarry Smith } 2899167e7480SBarry Smith } else mmax = m; 2900167e7480SBarry Smith 2901416022c9SBarry Smith rowners[0] = 0; 290217699dbbSLois Curfman McInnes for (i=2; i<=size; i++) { 2903416022c9SBarry Smith rowners[i] += rowners[i-1]; 2904416022c9SBarry Smith } 290517699dbbSLois Curfman McInnes rstart = rowners[rank]; 290617699dbbSLois Curfman McInnes rend = rowners[rank+1]; 2907416022c9SBarry Smith 2908416022c9SBarry Smith /* distribute row lengths to all processors */ 2909167e7480SBarry Smith ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 291017699dbbSLois Curfman McInnes if (!rank) { 2911dc231df0SBarry Smith ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2912dc231df0SBarry Smith ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2913b1d57f15SBarry Smith ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2914b1d57f15SBarry Smith ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2915dc231df0SBarry Smith for (j=0; j<m; j++) { 2916dc231df0SBarry Smith procsnz[0] += ourlens[j]; 2917dc231df0SBarry Smith } 2918dc231df0SBarry Smith for (i=1; i<size; i++) { 2919dc231df0SBarry Smith ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2920dc231df0SBarry Smith /* calculate the number of nonzeros on each processor */ 2921dc231df0SBarry Smith for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2922416022c9SBarry Smith procsnz[i] += rowlengths[j]; 2923416022c9SBarry Smith } 292413980483SBarry Smith mpicnt = PetscMPIIntCast(rowners[i+1]-rowners[i]); 292513980483SBarry Smith ierr = MPI_Send(rowlengths,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2926416022c9SBarry Smith } 2927606d414cSSatish Balay ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2928dc231df0SBarry Smith } else { 292913980483SBarry Smith mpicnt = PetscMPIIntCast(m);CHKERRQ(ierr); 293013980483SBarry Smith ierr = MPI_Recv(ourlens,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2931dc231df0SBarry Smith } 2932416022c9SBarry Smith 2933dc231df0SBarry Smith if (!rank) { 2934416022c9SBarry Smith /* determine max buffer needed and allocate it */ 2935416022c9SBarry Smith maxnz = 0; 29368a8e0b3aSBarry Smith for (i=0; i<size; i++) { 29370452661fSBarry Smith maxnz = PetscMax(maxnz,procsnz[i]); 2938416022c9SBarry Smith } 2939b1d57f15SBarry Smith ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2940416022c9SBarry Smith 2941416022c9SBarry Smith /* read in my part of the matrix column indices */ 2942416022c9SBarry Smith nz = procsnz[0]; 2943b1d57f15SBarry Smith ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 29440752156aSBarry Smith ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2945d65a2f8fSBarry Smith 2946d65a2f8fSBarry Smith /* read in every one elses and ship off */ 294717699dbbSLois Curfman McInnes for (i=1; i<size; i++) { 2948d65a2f8fSBarry Smith nz = procsnz[i]; 29490752156aSBarry Smith ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 295013980483SBarry Smith mpicnt = PetscMPIIntCast(nz); 295113980483SBarry Smith ierr = MPI_Send(cols,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2952d65a2f8fSBarry Smith } 2953606d414cSSatish Balay ierr = PetscFree(cols);CHKERRQ(ierr); 29543a40ed3dSBarry Smith } else { 2955416022c9SBarry Smith /* determine buffer space needed for message */ 2956416022c9SBarry Smith nz = 0; 2957416022c9SBarry Smith for (i=0; i<m; i++) { 2958416022c9SBarry Smith nz += ourlens[i]; 2959416022c9SBarry Smith } 2960dc231df0SBarry Smith ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2961416022c9SBarry Smith 2962416022c9SBarry Smith /* receive message of column indices*/ 296313980483SBarry Smith mpicnt = PetscMPIIntCast(nz);CHKERRQ(ierr); 296413980483SBarry Smith ierr = MPI_Recv(mycols,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 296513980483SBarry Smith ierr = MPI_Get_count(&status,MPIU_INT,&mpimaxnz);CHKERRQ(ierr); 29667c533972SBarry Smith if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);} 296713980483SBarry Smith else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);} 296813980483SBarry Smith else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);} 2969416022c9SBarry Smith } 2970416022c9SBarry Smith 2971b362ba68SBarry Smith /* determine column ownership if matrix is not square */ 2972b362ba68SBarry Smith if (N != M) { 2973b362ba68SBarry Smith n = N/size + ((N % size) > rank); 2974b1d57f15SBarry Smith ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2975b362ba68SBarry Smith cstart = cend - n; 2976b362ba68SBarry Smith } else { 2977b362ba68SBarry Smith cstart = rstart; 2978b362ba68SBarry Smith cend = rend; 2979fb2e594dSBarry Smith n = cend - cstart; 2980b362ba68SBarry Smith } 2981b362ba68SBarry Smith 2982416022c9SBarry Smith /* loop over local rows, determining number of off diagonal entries */ 2983b1d57f15SBarry Smith ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2984416022c9SBarry Smith jj = 0; 2985416022c9SBarry Smith for (i=0; i<m; i++) { 2986416022c9SBarry Smith for (j=0; j<ourlens[i]; j++) { 2987b362ba68SBarry Smith if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2988416022c9SBarry Smith jj++; 2989416022c9SBarry Smith } 2990416022c9SBarry Smith } 2991d65a2f8fSBarry Smith 2992d65a2f8fSBarry Smith /* create our matrix */ 2993416022c9SBarry Smith for (i=0; i<m; i++) { 2994416022c9SBarry Smith ourlens[i] -= offlens[i]; 2995416022c9SBarry Smith } 2996f69a0ea3SMatthew Knepley ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2997f69a0ea3SMatthew Knepley ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 2998d10c748bSKris Buschelman ierr = MatSetType(A,type);CHKERRQ(ierr); 2999d10c748bSKris Buschelman ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 3000d10c748bSKris Buschelman 3001d65a2f8fSBarry Smith for (i=0; i<m; i++) { 3002d65a2f8fSBarry Smith ourlens[i] += offlens[i]; 3003d65a2f8fSBarry Smith } 3004416022c9SBarry Smith 300517699dbbSLois Curfman McInnes if (!rank) { 3006906b51c7SHong Zhang ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3007416022c9SBarry Smith 3008416022c9SBarry Smith /* read in my part of the matrix numerical values */ 3009416022c9SBarry Smith nz = procsnz[0]; 30100752156aSBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3011d65a2f8fSBarry Smith 3012d65a2f8fSBarry Smith /* insert into matrix */ 3013d65a2f8fSBarry Smith jj = rstart; 3014d65a2f8fSBarry Smith smycols = mycols; 3015d65a2f8fSBarry Smith svals = vals; 3016d65a2f8fSBarry Smith for (i=0; i<m; i++) { 3017dc231df0SBarry Smith ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3018d65a2f8fSBarry Smith smycols += ourlens[i]; 3019d65a2f8fSBarry Smith svals += ourlens[i]; 3020d65a2f8fSBarry Smith jj++; 3021416022c9SBarry Smith } 3022416022c9SBarry Smith 3023d65a2f8fSBarry Smith /* read in other processors and ship out */ 302417699dbbSLois Curfman McInnes for (i=1; i<size; i++) { 3025416022c9SBarry Smith nz = procsnz[i]; 30260752156aSBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 302713980483SBarry Smith mpicnt = PetscMPIIntCast(nz); 302813980483SBarry Smith ierr = MPI_Send(vals,mpicnt,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 3029416022c9SBarry Smith } 3030606d414cSSatish Balay ierr = PetscFree(procsnz);CHKERRQ(ierr); 30313a40ed3dSBarry Smith } else { 3032d65a2f8fSBarry Smith /* receive numeric values */ 303387828ca2SBarry Smith ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3034416022c9SBarry Smith 3035d65a2f8fSBarry Smith /* receive message of values*/ 303613980483SBarry Smith mpicnt = PetscMPIIntCast(nz); 303713980483SBarry Smith ierr = MPI_Recv(vals,mpicnt,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 303813980483SBarry Smith ierr = MPI_Get_count(&status,MPIU_SCALAR,&mpimaxnz);CHKERRQ(ierr); 30397c533972SBarry Smith if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);} 304013980483SBarry Smith else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);} 304113980483SBarry Smith else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);} 3042d65a2f8fSBarry Smith 3043d65a2f8fSBarry Smith /* insert into matrix */ 3044d65a2f8fSBarry Smith jj = rstart; 3045d65a2f8fSBarry Smith smycols = mycols; 3046d65a2f8fSBarry Smith svals = vals; 3047d65a2f8fSBarry Smith for (i=0; i<m; i++) { 3048dc231df0SBarry Smith ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3049d65a2f8fSBarry Smith smycols += ourlens[i]; 3050d65a2f8fSBarry Smith svals += ourlens[i]; 3051d65a2f8fSBarry Smith jj++; 3052d65a2f8fSBarry Smith } 3053d65a2f8fSBarry Smith } 3054dc231df0SBarry Smith ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3055606d414cSSatish Balay ierr = PetscFree(vals);CHKERRQ(ierr); 3056606d414cSSatish Balay ierr = PetscFree(mycols);CHKERRQ(ierr); 3057606d414cSSatish Balay ierr = PetscFree(rowners);CHKERRQ(ierr); 3058d65a2f8fSBarry Smith 30596d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 30606d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3061d10c748bSKris Buschelman *newmat = A; 30623a40ed3dSBarry Smith PetscFunctionReturn(0); 3063416022c9SBarry Smith } 3064a0ff6018SBarry Smith 30654a2ae208SSatish Balay #undef __FUNCT__ 30664a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 30674aa3045dSJed Brown PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 30684aa3045dSJed Brown { 30694aa3045dSJed Brown PetscErrorCode ierr; 30704aa3045dSJed Brown IS iscol_local; 30714aa3045dSJed Brown PetscInt csize; 30724aa3045dSJed Brown 30734aa3045dSJed Brown PetscFunctionBegin; 30744aa3045dSJed Brown ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3075b79d0421SJed Brown if (call == MAT_REUSE_MATRIX) { 3076b79d0421SJed Brown ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3077b79d0421SJed Brown if (!iscol_local) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3078b79d0421SJed Brown } else { 30794aa3045dSJed Brown ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3080b79d0421SJed Brown } 30814aa3045dSJed Brown ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3082b79d0421SJed Brown if (call == MAT_INITIAL_MATRIX) { 3083b79d0421SJed Brown ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 30844aa3045dSJed Brown ierr = ISDestroy(iscol_local);CHKERRQ(ierr); 3085b79d0421SJed Brown } 30864aa3045dSJed Brown PetscFunctionReturn(0); 30874aa3045dSJed Brown } 30884aa3045dSJed Brown 30894aa3045dSJed Brown #undef __FUNCT__ 30904aa3045dSJed Brown #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3091a0ff6018SBarry Smith /* 309229da9460SBarry Smith Not great since it makes two copies of the submatrix, first an SeqAIJ 309329da9460SBarry Smith in local and then by concatenating the local matrices the end result. 309429da9460SBarry Smith Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 30954aa3045dSJed Brown 30964aa3045dSJed Brown Note: This requires a sequential iscol with all indices. 3097a0ff6018SBarry Smith */ 30984aa3045dSJed Brown PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3099a0ff6018SBarry Smith { 3100dfbe8321SBarry Smith PetscErrorCode ierr; 310132dcc486SBarry Smith PetscMPIInt rank,size; 3102b1d57f15SBarry Smith PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3103b1d57f15SBarry Smith PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3104fee21e36SBarry Smith Mat *local,M,Mreuse; 3105a77337e4SBarry Smith MatScalar *vwork,*aa; 31067adad957SLisandro Dalcin MPI_Comm comm = ((PetscObject)mat)->comm; 310700e6dbe6SBarry Smith Mat_SeqAIJ *aij; 31087e2c5f70SBarry Smith 3109a0ff6018SBarry Smith 3110a0ff6018SBarry Smith PetscFunctionBegin; 31111dab6e02SBarry Smith ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 31121dab6e02SBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 311300e6dbe6SBarry Smith 3114fee21e36SBarry Smith if (call == MAT_REUSE_MATRIX) { 3115fee21e36SBarry Smith ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3116e005ede5SBarry Smith if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3117fee21e36SBarry Smith local = &Mreuse; 3118fee21e36SBarry Smith ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3119fee21e36SBarry Smith } else { 3120a0ff6018SBarry Smith ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3121fee21e36SBarry Smith Mreuse = *local; 3122606d414cSSatish Balay ierr = PetscFree(local);CHKERRQ(ierr); 3123fee21e36SBarry Smith } 3124a0ff6018SBarry Smith 3125a0ff6018SBarry Smith /* 3126a0ff6018SBarry Smith m - number of local rows 3127a0ff6018SBarry Smith n - number of columns (same on all processors) 3128a0ff6018SBarry Smith rstart - first row in new global matrix generated 3129a0ff6018SBarry Smith */ 3130fee21e36SBarry Smith ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3131a0ff6018SBarry Smith if (call == MAT_INITIAL_MATRIX) { 3132fee21e36SBarry Smith aij = (Mat_SeqAIJ*)(Mreuse)->data; 313300e6dbe6SBarry Smith ii = aij->i; 313400e6dbe6SBarry Smith jj = aij->j; 313500e6dbe6SBarry Smith 3136a0ff6018SBarry Smith /* 313700e6dbe6SBarry Smith Determine the number of non-zeros in the diagonal and off-diagonal 313800e6dbe6SBarry Smith portions of the matrix in order to do correct preallocation 3139a0ff6018SBarry Smith */ 314000e6dbe6SBarry Smith 314100e6dbe6SBarry Smith /* first get start and end of "diagonal" columns */ 31426a6a5d1dSBarry Smith if (csize == PETSC_DECIDE) { 3143ab50ec6bSBarry Smith ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3144ab50ec6bSBarry Smith if (mglobal == n) { /* square matrix */ 3145e2c4fddaSBarry Smith nlocal = m; 31466a6a5d1dSBarry Smith } else { 3147ab50ec6bSBarry Smith nlocal = n/size + ((n % size) > rank); 3148ab50ec6bSBarry Smith } 3149ab50ec6bSBarry Smith } else { 31506a6a5d1dSBarry Smith nlocal = csize; 31516a6a5d1dSBarry Smith } 3152b1d57f15SBarry Smith ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 315300e6dbe6SBarry Smith rstart = rend - nlocal; 31546a6a5d1dSBarry Smith if (rank == size - 1 && rend != n) { 315577431f27SBarry Smith SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 31566a6a5d1dSBarry Smith } 315700e6dbe6SBarry Smith 315800e6dbe6SBarry Smith /* next, compute all the lengths */ 3159b1d57f15SBarry Smith ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 316000e6dbe6SBarry Smith olens = dlens + m; 316100e6dbe6SBarry Smith for (i=0; i<m; i++) { 316200e6dbe6SBarry Smith jend = ii[i+1] - ii[i]; 316300e6dbe6SBarry Smith olen = 0; 316400e6dbe6SBarry Smith dlen = 0; 316500e6dbe6SBarry Smith for (j=0; j<jend; j++) { 316600e6dbe6SBarry Smith if (*jj < rstart || *jj >= rend) olen++; 316700e6dbe6SBarry Smith else dlen++; 316800e6dbe6SBarry Smith jj++; 316900e6dbe6SBarry Smith } 317000e6dbe6SBarry Smith olens[i] = olen; 317100e6dbe6SBarry Smith dlens[i] = dlen; 317200e6dbe6SBarry Smith } 3173f69a0ea3SMatthew Knepley ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3174f69a0ea3SMatthew Knepley ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 31757adad957SLisandro Dalcin ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3176e2d9671bSKris Buschelman ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3177606d414cSSatish Balay ierr = PetscFree(dlens);CHKERRQ(ierr); 3178a0ff6018SBarry Smith } else { 3179b1d57f15SBarry Smith PetscInt ml,nl; 3180a0ff6018SBarry Smith 3181a0ff6018SBarry Smith M = *newmat; 3182a0ff6018SBarry Smith ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 318329bbc08cSBarry Smith if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3184a0ff6018SBarry Smith ierr = MatZeroEntries(M);CHKERRQ(ierr); 3185c48de900SBarry Smith /* 3186c48de900SBarry Smith The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3187c48de900SBarry Smith rather than the slower MatSetValues(). 3188c48de900SBarry Smith */ 3189c48de900SBarry Smith M->was_assembled = PETSC_TRUE; 3190c48de900SBarry Smith M->assembled = PETSC_FALSE; 3191a0ff6018SBarry Smith } 3192a0ff6018SBarry Smith ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3193fee21e36SBarry Smith aij = (Mat_SeqAIJ*)(Mreuse)->data; 319400e6dbe6SBarry Smith ii = aij->i; 319500e6dbe6SBarry Smith jj = aij->j; 319600e6dbe6SBarry Smith aa = aij->a; 3197a0ff6018SBarry Smith for (i=0; i<m; i++) { 3198a0ff6018SBarry Smith row = rstart + i; 319900e6dbe6SBarry Smith nz = ii[i+1] - ii[i]; 320000e6dbe6SBarry Smith cwork = jj; jj += nz; 320100e6dbe6SBarry Smith vwork = aa; aa += nz; 32028c638d02SBarry Smith ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3203a0ff6018SBarry Smith } 3204a0ff6018SBarry Smith 3205a0ff6018SBarry Smith ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3206a0ff6018SBarry Smith ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3207a0ff6018SBarry Smith *newmat = M; 3208fee21e36SBarry Smith 3209fee21e36SBarry Smith /* save submatrix used in processor for next request */ 3210fee21e36SBarry Smith if (call == MAT_INITIAL_MATRIX) { 3211fee21e36SBarry Smith ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3212fee21e36SBarry Smith ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 3213fee21e36SBarry Smith } 3214fee21e36SBarry Smith 3215a0ff6018SBarry Smith PetscFunctionReturn(0); 3216a0ff6018SBarry Smith } 3217273d9f13SBarry Smith 3218e2e86b8fSSatish Balay EXTERN_C_BEGIN 32194a2ae208SSatish Balay #undef __FUNCT__ 3220ccd8e176SBarry Smith #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3221b7940d39SSatish Balay PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3222ccd8e176SBarry Smith { 3223899cda47SBarry Smith PetscInt m,cstart, cend,j,nnz,i,d; 3224899cda47SBarry Smith PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3225ccd8e176SBarry Smith const PetscInt *JJ; 3226ccd8e176SBarry Smith PetscScalar *values; 3227ccd8e176SBarry Smith PetscErrorCode ierr; 3228ccd8e176SBarry Smith 3229ccd8e176SBarry Smith PetscFunctionBegin; 3230b7940d39SSatish Balay if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3231899cda47SBarry Smith 32327408324eSLisandro Dalcin ierr = PetscMapSetBlockSize(B->rmap,1);CHKERRQ(ierr); 32337408324eSLisandro Dalcin ierr = PetscMapSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3234d0f46423SBarry Smith ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr); 3235d0f46423SBarry Smith ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr); 3236d0f46423SBarry Smith m = B->rmap->n; 3237d0f46423SBarry Smith cstart = B->cmap->rstart; 3238d0f46423SBarry Smith cend = B->cmap->rend; 3239d0f46423SBarry Smith rstart = B->rmap->rstart; 3240899cda47SBarry Smith 3241ccd8e176SBarry Smith ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 3242ccd8e176SBarry Smith o_nnz = d_nnz + m; 3243ccd8e176SBarry Smith 3244ecc77c7aSBarry Smith #if defined(PETSC_USE_DEBUGGING) 3245ecc77c7aSBarry Smith for (i=0; i<m; i++) { 3246ecc77c7aSBarry Smith nnz = Ii[i+1]- Ii[i]; 3247ecc77c7aSBarry Smith JJ = J + Ii[i]; 3248ecc77c7aSBarry Smith if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3249ecc77c7aSBarry Smith if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3250d0f46423SBarry Smith if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3251ecc77c7aSBarry Smith for (j=1; j<nnz; j++) { 3252ecc77c7aSBarry Smith if (JJ[i] <= JJ[i-1]) SETERRRQ(PETSC_ERR_ARG_WRONGSTATE,"Row %D has unsorted column index at %D location in column indices",i,j); 3253ecc77c7aSBarry Smith } 3254ecc77c7aSBarry Smith } 3255ecc77c7aSBarry Smith #endif 3256ecc77c7aSBarry Smith 3257ccd8e176SBarry Smith for (i=0; i<m; i++) { 3258b7940d39SSatish Balay nnz = Ii[i+1]- Ii[i]; 3259b7940d39SSatish Balay JJ = J + Ii[i]; 3260ccd8e176SBarry Smith nnz_max = PetscMax(nnz_max,nnz); 3261ccd8e176SBarry Smith for (j=0; j<nnz; j++) { 3262ccd8e176SBarry Smith if (*JJ >= cstart) break; 3263ccd8e176SBarry Smith JJ++; 3264ccd8e176SBarry Smith } 3265ccd8e176SBarry Smith d = 0; 3266ccd8e176SBarry Smith for (; j<nnz; j++) { 3267ccd8e176SBarry Smith if (*JJ++ >= cend) break; 3268ccd8e176SBarry Smith d++; 3269ccd8e176SBarry Smith } 3270ccd8e176SBarry Smith d_nnz[i] = d; 3271ccd8e176SBarry Smith o_nnz[i] = nnz - d; 3272ccd8e176SBarry Smith } 3273ccd8e176SBarry Smith ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3274ccd8e176SBarry Smith ierr = PetscFree(d_nnz);CHKERRQ(ierr); 3275ccd8e176SBarry Smith 3276ccd8e176SBarry Smith if (v) values = (PetscScalar*)v; 3277ccd8e176SBarry Smith else { 3278ccd8e176SBarry Smith ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3279ccd8e176SBarry Smith ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3280ccd8e176SBarry Smith } 3281ccd8e176SBarry Smith 3282ccd8e176SBarry Smith for (i=0; i<m; i++) { 3283ccd8e176SBarry Smith ii = i + rstart; 3284b7940d39SSatish Balay nnz = Ii[i+1]- Ii[i]; 3285b7940d39SSatish Balay ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3286ccd8e176SBarry Smith } 3287ccd8e176SBarry Smith ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3288ccd8e176SBarry Smith ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3289ccd8e176SBarry Smith 3290ccd8e176SBarry Smith if (!v) { 3291ccd8e176SBarry Smith ierr = PetscFree(values);CHKERRQ(ierr); 3292ccd8e176SBarry Smith } 3293ccd8e176SBarry Smith PetscFunctionReturn(0); 3294ccd8e176SBarry Smith } 3295e2e86b8fSSatish Balay EXTERN_C_END 3296ccd8e176SBarry Smith 3297ccd8e176SBarry Smith #undef __FUNCT__ 3298ccd8e176SBarry Smith #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 32991eea217eSSatish Balay /*@ 3300ccd8e176SBarry Smith MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3301ccd8e176SBarry Smith (the default parallel PETSc format). 3302ccd8e176SBarry Smith 3303ccd8e176SBarry Smith Collective on MPI_Comm 3304ccd8e176SBarry Smith 3305ccd8e176SBarry Smith Input Parameters: 3306a1661176SMatthew Knepley + B - the matrix 3307ccd8e176SBarry Smith . i - the indices into j for the start of each local row (starts with zero) 3308ccd8e176SBarry Smith . j - the column indices for each local row (starts with zero) these must be sorted for each row 3309ccd8e176SBarry Smith - v - optional values in the matrix 3310ccd8e176SBarry Smith 3311ccd8e176SBarry Smith Level: developer 3312ccd8e176SBarry Smith 331312251496SSatish Balay Notes: 331412251496SSatish Balay The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 331512251496SSatish Balay thus you CANNOT change the matrix entries by changing the values of a[] after you have 331612251496SSatish Balay called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 331712251496SSatish Balay 331812251496SSatish Balay The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 331912251496SSatish Balay 332012251496SSatish Balay The format which is used for the sparse matrix input, is equivalent to a 332112251496SSatish Balay row-major ordering.. i.e for the following matrix, the input data expected is 332212251496SSatish Balay as shown: 332312251496SSatish Balay 332412251496SSatish Balay 1 0 0 332512251496SSatish Balay 2 0 3 P0 332612251496SSatish Balay ------- 332712251496SSatish Balay 4 5 6 P1 332812251496SSatish Balay 332912251496SSatish Balay Process0 [P0]: rows_owned=[0,1] 333012251496SSatish Balay i = {0,1,3} [size = nrow+1 = 2+1] 333112251496SSatish Balay j = {0,0,2} [size = nz = 6] 333212251496SSatish Balay v = {1,2,3} [size = nz = 6] 333312251496SSatish Balay 333412251496SSatish Balay Process1 [P1]: rows_owned=[2] 333512251496SSatish Balay i = {0,3} [size = nrow+1 = 1+1] 333612251496SSatish Balay j = {0,1,2} [size = nz = 6] 333712251496SSatish Balay v = {4,5,6} [size = nz = 6] 333812251496SSatish Balay 3339ecc77c7aSBarry Smith The column indices for each row MUST be sorted. 33402fb0ec9aSBarry Smith 3341ccd8e176SBarry Smith .keywords: matrix, aij, compressed row, sparse, parallel 3342ccd8e176SBarry Smith 33432fb0ec9aSBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 33448d7a6e47SBarry Smith MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3345ccd8e176SBarry Smith @*/ 3346be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3347ccd8e176SBarry Smith { 3348ccd8e176SBarry Smith PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 3349ccd8e176SBarry Smith 3350ccd8e176SBarry Smith PetscFunctionBegin; 3351ccd8e176SBarry Smith ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 3352ccd8e176SBarry Smith if (f) { 3353ccd8e176SBarry Smith ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 3354ccd8e176SBarry Smith } 3355ccd8e176SBarry Smith PetscFunctionReturn(0); 3356ccd8e176SBarry Smith } 3357ccd8e176SBarry Smith 3358ccd8e176SBarry Smith #undef __FUNCT__ 33594a2ae208SSatish Balay #define __FUNCT__ "MatMPIAIJSetPreallocation" 3360273d9f13SBarry Smith /*@C 3361ccd8e176SBarry Smith MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3362273d9f13SBarry Smith (the default parallel PETSc format). For good matrix assembly performance 3363273d9f13SBarry Smith the user should preallocate the matrix storage by setting the parameters 3364273d9f13SBarry Smith d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3365273d9f13SBarry Smith performance can be increased by more than a factor of 50. 3366273d9f13SBarry Smith 3367273d9f13SBarry Smith Collective on MPI_Comm 3368273d9f13SBarry Smith 3369273d9f13SBarry Smith Input Parameters: 3370273d9f13SBarry Smith + A - the matrix 3371273d9f13SBarry Smith . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3372273d9f13SBarry Smith (same value is used for all local rows) 3373273d9f13SBarry Smith . d_nnz - array containing the number of nonzeros in the various rows of the 3374273d9f13SBarry Smith DIAGONAL portion of the local submatrix (possibly different for each row) 3375273d9f13SBarry Smith or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3376273d9f13SBarry Smith The size of this array is equal to the number of local rows, i.e 'm'. 3377273d9f13SBarry Smith You must leave room for the diagonal entry even if it is zero. 3378273d9f13SBarry Smith . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3379273d9f13SBarry Smith submatrix (same value is used for all local rows). 3380273d9f13SBarry Smith - o_nnz - array containing the number of nonzeros in the various rows of the 3381273d9f13SBarry Smith OFF-DIAGONAL portion of the local submatrix (possibly different for 3382273d9f13SBarry Smith each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3383273d9f13SBarry Smith structure. The size of this array is equal to the number 3384273d9f13SBarry Smith of local rows, i.e 'm'. 3385273d9f13SBarry Smith 338649a6f317SBarry Smith If the *_nnz parameter is given then the *_nz parameter is ignored 338749a6f317SBarry Smith 3388273d9f13SBarry Smith The AIJ format (also called the Yale sparse matrix format or 3389ccd8e176SBarry Smith compressed row storage (CSR)), is fully compatible with standard Fortran 77 3390ccd8e176SBarry Smith storage. The stored row and column indices begin with zero. See the users manual for details. 3391273d9f13SBarry Smith 3392273d9f13SBarry Smith The parallel matrix is partitioned such that the first m0 rows belong to 3393273d9f13SBarry Smith process 0, the next m1 rows belong to process 1, the next m2 rows belong 3394273d9f13SBarry Smith to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3395273d9f13SBarry Smith 3396273d9f13SBarry Smith The DIAGONAL portion of the local submatrix of a processor can be defined 3397273d9f13SBarry Smith as the submatrix which is obtained by extraction the part corresponding 3398273d9f13SBarry Smith to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 3399273d9f13SBarry Smith first row that belongs to the processor, and r2 is the last row belonging 3400273d9f13SBarry Smith to the this processor. This is a square mxm matrix. The remaining portion 3401273d9f13SBarry Smith of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 3402273d9f13SBarry Smith 3403273d9f13SBarry Smith If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3404273d9f13SBarry Smith 3405aa95bbe8SBarry Smith You can call MatGetInfo() to get information on how effective the preallocation was; 3406aa95bbe8SBarry Smith for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3407aa95bbe8SBarry Smith You can also run with the option -info and look for messages with the string 3408aa95bbe8SBarry Smith malloc in them to see if additional memory allocation was needed. 3409aa95bbe8SBarry Smith 3410273d9f13SBarry Smith Example usage: 3411273d9f13SBarry Smith 3412273d9f13SBarry Smith Consider the following 8x8 matrix with 34 non-zero values, that is 3413273d9f13SBarry Smith assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3414273d9f13SBarry Smith proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3415273d9f13SBarry Smith as follows: 3416273d9f13SBarry Smith 3417273d9f13SBarry Smith .vb 3418273d9f13SBarry Smith 1 2 0 | 0 3 0 | 0 4 3419273d9f13SBarry Smith Proc0 0 5 6 | 7 0 0 | 8 0 3420273d9f13SBarry Smith 9 0 10 | 11 0 0 | 12 0 3421273d9f13SBarry Smith ------------------------------------- 3422273d9f13SBarry Smith 13 0 14 | 15 16 17 | 0 0 3423273d9f13SBarry Smith Proc1 0 18 0 | 19 20 21 | 0 0 3424273d9f13SBarry Smith 0 0 0 | 22 23 0 | 24 0 3425273d9f13SBarry Smith ------------------------------------- 3426273d9f13SBarry Smith Proc2 25 26 27 | 0 0 28 | 29 0 3427273d9f13SBarry Smith 30 0 0 | 31 32 33 | 0 34 3428273d9f13SBarry Smith .ve 3429273d9f13SBarry Smith 3430273d9f13SBarry Smith This can be represented as a collection of submatrices as: 3431273d9f13SBarry Smith 3432273d9f13SBarry Smith .vb 3433273d9f13SBarry Smith A B C 3434273d9f13SBarry Smith D E F 3435273d9f13SBarry Smith G H I 3436273d9f13SBarry Smith .ve 3437273d9f13SBarry Smith 3438273d9f13SBarry Smith Where the submatrices A,B,C are owned by proc0, D,E,F are 3439273d9f13SBarry Smith owned by proc1, G,H,I are owned by proc2. 3440273d9f13SBarry Smith 3441273d9f13SBarry Smith The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3442273d9f13SBarry Smith The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3443273d9f13SBarry Smith The 'M','N' parameters are 8,8, and have the same values on all procs. 3444273d9f13SBarry Smith 3445273d9f13SBarry Smith The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3446273d9f13SBarry Smith submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3447273d9f13SBarry Smith corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3448273d9f13SBarry Smith Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3449273d9f13SBarry Smith part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3450273d9f13SBarry Smith matrix, ans [DF] as another SeqAIJ matrix. 3451273d9f13SBarry Smith 3452273d9f13SBarry Smith When d_nz, o_nz parameters are specified, d_nz storage elements are 3453273d9f13SBarry Smith allocated for every row of the local diagonal submatrix, and o_nz 3454273d9f13SBarry Smith storage locations are allocated for every row of the OFF-DIAGONAL submat. 3455273d9f13SBarry Smith One way to choose d_nz and o_nz is to use the max nonzerors per local 3456273d9f13SBarry Smith rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3457273d9f13SBarry Smith In this case, the values of d_nz,o_nz are: 3458273d9f13SBarry Smith .vb 3459273d9f13SBarry Smith proc0 : dnz = 2, o_nz = 2 3460273d9f13SBarry Smith proc1 : dnz = 3, o_nz = 2 3461273d9f13SBarry Smith proc2 : dnz = 1, o_nz = 4 3462273d9f13SBarry Smith .ve 3463273d9f13SBarry Smith We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3464273d9f13SBarry Smith translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3465273d9f13SBarry Smith for proc3. i.e we are using 12+15+10=37 storage locations to store 3466273d9f13SBarry Smith 34 values. 3467273d9f13SBarry Smith 3468273d9f13SBarry Smith When d_nnz, o_nnz parameters are specified, the storage is specified 3469273d9f13SBarry Smith for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3470273d9f13SBarry Smith In the above case the values for d_nnz,o_nnz are: 3471273d9f13SBarry Smith .vb 3472273d9f13SBarry Smith proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3473273d9f13SBarry Smith proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3474273d9f13SBarry Smith proc2: d_nnz = [1,1] and o_nnz = [4,4] 3475273d9f13SBarry Smith .ve 3476273d9f13SBarry Smith Here the space allocated is sum of all the above values i.e 34, and 3477273d9f13SBarry Smith hence pre-allocation is perfect. 3478273d9f13SBarry Smith 3479273d9f13SBarry Smith Level: intermediate 3480273d9f13SBarry Smith 3481273d9f13SBarry Smith .keywords: matrix, aij, compressed row, sparse, parallel 3482273d9f13SBarry Smith 3483ccd8e176SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3484aa95bbe8SBarry Smith MPIAIJ, MatGetInfo() 3485273d9f13SBarry Smith @*/ 3486be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3487273d9f13SBarry Smith { 3488b1d57f15SBarry Smith PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3489273d9f13SBarry Smith 3490273d9f13SBarry Smith PetscFunctionBegin; 3491a23d5eceSKris Buschelman ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3492a23d5eceSKris Buschelman if (f) { 3493a23d5eceSKris Buschelman ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3494273d9f13SBarry Smith } 3495273d9f13SBarry Smith PetscFunctionReturn(0); 3496273d9f13SBarry Smith } 3497273d9f13SBarry Smith 34984a2ae208SSatish Balay #undef __FUNCT__ 34992fb0ec9aSBarry Smith #define __FUNCT__ "MatCreateMPIAIJWithArrays" 350058d36128SBarry Smith /*@ 35012fb0ec9aSBarry Smith MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 35022fb0ec9aSBarry Smith CSR format the local rows. 35032fb0ec9aSBarry Smith 35042fb0ec9aSBarry Smith Collective on MPI_Comm 35052fb0ec9aSBarry Smith 35062fb0ec9aSBarry Smith Input Parameters: 35072fb0ec9aSBarry Smith + comm - MPI communicator 35082fb0ec9aSBarry Smith . m - number of local rows (Cannot be PETSC_DECIDE) 35092fb0ec9aSBarry Smith . n - This value should be the same as the local size used in creating the 35102fb0ec9aSBarry Smith x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 35112fb0ec9aSBarry Smith calculated if N is given) For square matrices n is almost always m. 35122fb0ec9aSBarry Smith . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 35132fb0ec9aSBarry Smith . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 35142fb0ec9aSBarry Smith . i - row indices 35152fb0ec9aSBarry Smith . j - column indices 35162fb0ec9aSBarry Smith - a - matrix values 35172fb0ec9aSBarry Smith 35182fb0ec9aSBarry Smith Output Parameter: 35192fb0ec9aSBarry Smith . mat - the matrix 352003bfb495SBarry Smith 35212fb0ec9aSBarry Smith Level: intermediate 35222fb0ec9aSBarry Smith 35232fb0ec9aSBarry Smith Notes: 35242fb0ec9aSBarry Smith The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 35252fb0ec9aSBarry Smith thus you CANNOT change the matrix entries by changing the values of a[] after you have 35268d7a6e47SBarry Smith called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 35272fb0ec9aSBarry Smith 352812251496SSatish Balay The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 352912251496SSatish Balay 353012251496SSatish Balay The format which is used for the sparse matrix input, is equivalent to a 353112251496SSatish Balay row-major ordering.. i.e for the following matrix, the input data expected is 353212251496SSatish Balay as shown: 353312251496SSatish Balay 353412251496SSatish Balay 1 0 0 353512251496SSatish Balay 2 0 3 P0 353612251496SSatish Balay ------- 353712251496SSatish Balay 4 5 6 P1 353812251496SSatish Balay 353912251496SSatish Balay Process0 [P0]: rows_owned=[0,1] 354012251496SSatish Balay i = {0,1,3} [size = nrow+1 = 2+1] 354112251496SSatish Balay j = {0,0,2} [size = nz = 6] 354212251496SSatish Balay v = {1,2,3} [size = nz = 6] 354312251496SSatish Balay 354412251496SSatish Balay Process1 [P1]: rows_owned=[2] 354512251496SSatish Balay i = {0,3} [size = nrow+1 = 1+1] 354612251496SSatish Balay j = {0,1,2} [size = nz = 6] 354712251496SSatish Balay v = {4,5,6} [size = nz = 6] 35482fb0ec9aSBarry Smith 35492fb0ec9aSBarry Smith .keywords: matrix, aij, compressed row, sparse, parallel 35502fb0ec9aSBarry Smith 35512fb0ec9aSBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 35528d7a6e47SBarry Smith MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 35532fb0ec9aSBarry Smith @*/ 355482b90586SSatish Balay PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 35552fb0ec9aSBarry Smith { 35562fb0ec9aSBarry Smith PetscErrorCode ierr; 35572fb0ec9aSBarry Smith 35582fb0ec9aSBarry Smith PetscFunctionBegin; 35592fb0ec9aSBarry Smith if (i[0]) { 35602fb0ec9aSBarry Smith SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 35612fb0ec9aSBarry Smith } 35622fb0ec9aSBarry Smith if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 35632fb0ec9aSBarry Smith ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3564d4146a68SBarry Smith ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 35652fb0ec9aSBarry Smith ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 35662fb0ec9aSBarry Smith ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 35672fb0ec9aSBarry Smith PetscFunctionReturn(0); 35682fb0ec9aSBarry Smith } 35692fb0ec9aSBarry Smith 35702fb0ec9aSBarry Smith #undef __FUNCT__ 35714a2ae208SSatish Balay #define __FUNCT__ "MatCreateMPIAIJ" 3572273d9f13SBarry Smith /*@C 3573273d9f13SBarry Smith MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3574273d9f13SBarry Smith (the default parallel PETSc format). For good matrix assembly performance 3575273d9f13SBarry Smith the user should preallocate the matrix storage by setting the parameters 3576273d9f13SBarry Smith d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3577273d9f13SBarry Smith performance can be increased by more than a factor of 50. 3578273d9f13SBarry Smith 3579273d9f13SBarry Smith Collective on MPI_Comm 3580273d9f13SBarry Smith 3581273d9f13SBarry Smith Input Parameters: 3582273d9f13SBarry Smith + comm - MPI communicator 3583273d9f13SBarry Smith . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3584273d9f13SBarry Smith This value should be the same as the local size used in creating the 3585273d9f13SBarry Smith y vector for the matrix-vector product y = Ax. 3586273d9f13SBarry Smith . n - This value should be the same as the local size used in creating the 3587273d9f13SBarry Smith x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3588273d9f13SBarry Smith calculated if N is given) For square matrices n is almost always m. 3589273d9f13SBarry Smith . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3590273d9f13SBarry Smith . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3591273d9f13SBarry Smith . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3592273d9f13SBarry Smith (same value is used for all local rows) 3593273d9f13SBarry Smith . d_nnz - array containing the number of nonzeros in the various rows of the 3594273d9f13SBarry Smith DIAGONAL portion of the local submatrix (possibly different for each row) 3595273d9f13SBarry Smith or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3596273d9f13SBarry Smith The size of this array is equal to the number of local rows, i.e 'm'. 3597273d9f13SBarry Smith You must leave room for the diagonal entry even if it is zero. 3598273d9f13SBarry Smith . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3599273d9f13SBarry Smith submatrix (same value is used for all local rows). 3600273d9f13SBarry Smith - o_nnz - array containing the number of nonzeros in the various rows of the 3601273d9f13SBarry Smith OFF-DIAGONAL portion of the local submatrix (possibly different for 3602273d9f13SBarry Smith each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3603273d9f13SBarry Smith structure. The size of this array is equal to the number 3604273d9f13SBarry Smith of local rows, i.e 'm'. 3605273d9f13SBarry Smith 3606273d9f13SBarry Smith Output Parameter: 3607273d9f13SBarry Smith . A - the matrix 3608273d9f13SBarry Smith 3609175b88e8SBarry Smith It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3610ae1d86c5SBarry Smith MatXXXXSetPreallocation() paradgm instead of this routine directly. 3611175b88e8SBarry Smith [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3612175b88e8SBarry Smith 3613273d9f13SBarry Smith Notes: 361449a6f317SBarry Smith If the *_nnz parameter is given then the *_nz parameter is ignored 361549a6f317SBarry Smith 3616273d9f13SBarry Smith m,n,M,N parameters specify the size of the matrix, and its partitioning across 3617273d9f13SBarry Smith processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3618273d9f13SBarry Smith storage requirements for this matrix. 3619273d9f13SBarry Smith 3620273d9f13SBarry Smith If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3621273d9f13SBarry Smith processor than it must be used on all processors that share the object for 3622273d9f13SBarry Smith that argument. 3623273d9f13SBarry Smith 3624273d9f13SBarry Smith The user MUST specify either the local or global matrix dimensions 3625273d9f13SBarry Smith (possibly both). 3626273d9f13SBarry Smith 362733a7c187SSatish Balay The parallel matrix is partitioned across processors such that the 362833a7c187SSatish Balay first m0 rows belong to process 0, the next m1 rows belong to 362933a7c187SSatish Balay process 1, the next m2 rows belong to process 2 etc.. where 363033a7c187SSatish Balay m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 363133a7c187SSatish Balay values corresponding to [m x N] submatrix. 3632273d9f13SBarry Smith 363333a7c187SSatish Balay The columns are logically partitioned with the n0 columns belonging 363433a7c187SSatish Balay to 0th partition, the next n1 columns belonging to the next 363533a7c187SSatish Balay partition etc.. where n0,n1,n2... are the the input parameter 'n'. 363633a7c187SSatish Balay 363733a7c187SSatish Balay The DIAGONAL portion of the local submatrix on any given processor 363833a7c187SSatish Balay is the submatrix corresponding to the rows and columns m,n 363933a7c187SSatish Balay corresponding to the given processor. i.e diagonal matrix on 364033a7c187SSatish Balay process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 364133a7c187SSatish Balay etc. The remaining portion of the local submatrix [m x (N-n)] 364233a7c187SSatish Balay constitute the OFF-DIAGONAL portion. The example below better 364333a7c187SSatish Balay illustrates this concept. 364433a7c187SSatish Balay 364533a7c187SSatish Balay For a square global matrix we define each processor's diagonal portion 364633a7c187SSatish Balay to be its local rows and the corresponding columns (a square submatrix); 364733a7c187SSatish Balay each processor's off-diagonal portion encompasses the remainder of the 364833a7c187SSatish Balay local matrix (a rectangular submatrix). 3649273d9f13SBarry Smith 3650273d9f13SBarry Smith If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3651273d9f13SBarry Smith 365297d05335SKris Buschelman When calling this routine with a single process communicator, a matrix of 365397d05335SKris Buschelman type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 365497d05335SKris Buschelman type of communicator, use the construction mechanism: 365578102f6cSMatthew Knepley MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 365697d05335SKris Buschelman 3657273d9f13SBarry Smith By default, this format uses inodes (identical nodes) when possible. 3658273d9f13SBarry Smith We search for consecutive rows with the same nonzero structure, thereby 3659273d9f13SBarry Smith reusing matrix information to achieve increased efficiency. 3660273d9f13SBarry Smith 3661273d9f13SBarry Smith Options Database Keys: 3662923f20ffSKris Buschelman + -mat_no_inode - Do not use inodes 3663923f20ffSKris Buschelman . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3664273d9f13SBarry Smith - -mat_aij_oneindex - Internally use indexing starting at 1 3665273d9f13SBarry Smith rather than 0. Note that when calling MatSetValues(), 3666273d9f13SBarry Smith the user still MUST index entries starting at 0! 3667273d9f13SBarry Smith 3668273d9f13SBarry Smith 3669273d9f13SBarry Smith Example usage: 3670273d9f13SBarry Smith 3671273d9f13SBarry Smith Consider the following 8x8 matrix with 34 non-zero values, that is 3672273d9f13SBarry Smith assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3673273d9f13SBarry Smith proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3674273d9f13SBarry Smith as follows: 3675273d9f13SBarry Smith 3676273d9f13SBarry Smith .vb 3677273d9f13SBarry Smith 1 2 0 | 0 3 0 | 0 4 3678273d9f13SBarry Smith Proc0 0 5 6 | 7 0 0 | 8 0 3679273d9f13SBarry Smith 9 0 10 | 11 0 0 | 12 0 3680273d9f13SBarry Smith ------------------------------------- 3681273d9f13SBarry Smith 13 0 14 | 15 16 17 | 0 0 3682273d9f13SBarry Smith Proc1 0 18 0 | 19 20 21 | 0 0 3683273d9f13SBarry Smith 0 0 0 | 22 23 0 | 24 0 3684273d9f13SBarry Smith ------------------------------------- 3685273d9f13SBarry Smith Proc2 25 26 27 | 0 0 28 | 29 0 3686273d9f13SBarry Smith 30 0 0 | 31 32 33 | 0 34 3687273d9f13SBarry Smith .ve 3688273d9f13SBarry Smith 3689273d9f13SBarry Smith This can be represented as a collection of submatrices as: 3690273d9f13SBarry Smith 3691273d9f13SBarry Smith .vb 3692273d9f13SBarry Smith A B C 3693273d9f13SBarry Smith D E F 3694273d9f13SBarry Smith G H I 3695273d9f13SBarry Smith .ve 3696273d9f13SBarry Smith 3697273d9f13SBarry Smith Where the submatrices A,B,C are owned by proc0, D,E,F are 3698273d9f13SBarry Smith owned by proc1, G,H,I are owned by proc2. 3699273d9f13SBarry Smith 3700273d9f13SBarry Smith The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3701273d9f13SBarry Smith The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3702273d9f13SBarry Smith The 'M','N' parameters are 8,8, and have the same values on all procs. 3703273d9f13SBarry Smith 3704273d9f13SBarry Smith The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3705273d9f13SBarry Smith submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3706273d9f13SBarry Smith corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3707273d9f13SBarry Smith Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3708273d9f13SBarry Smith part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3709273d9f13SBarry Smith matrix, ans [DF] as another SeqAIJ matrix. 3710273d9f13SBarry Smith 3711273d9f13SBarry Smith When d_nz, o_nz parameters are specified, d_nz storage elements are 3712273d9f13SBarry Smith allocated for every row of the local diagonal submatrix, and o_nz 3713273d9f13SBarry Smith storage locations are allocated for every row of the OFF-DIAGONAL submat. 3714273d9f13SBarry Smith One way to choose d_nz and o_nz is to use the max nonzerors per local 3715273d9f13SBarry Smith rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3716273d9f13SBarry Smith In this case, the values of d_nz,o_nz are: 3717273d9f13SBarry Smith .vb 3718273d9f13SBarry Smith proc0 : dnz = 2, o_nz = 2 3719273d9f13SBarry Smith proc1 : dnz = 3, o_nz = 2 3720273d9f13SBarry Smith proc2 : dnz = 1, o_nz = 4 3721273d9f13SBarry Smith .ve 3722273d9f13SBarry Smith We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3723273d9f13SBarry Smith translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3724273d9f13SBarry Smith for proc3. i.e we are using 12+15+10=37 storage locations to store 3725273d9f13SBarry Smith 34 values. 3726273d9f13SBarry Smith 3727273d9f13SBarry Smith When d_nnz, o_nnz parameters are specified, the storage is specified 3728273d9f13SBarry Smith for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3729273d9f13SBarry Smith In the above case the values for d_nnz,o_nnz are: 3730273d9f13SBarry Smith .vb 3731273d9f13SBarry Smith proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3732273d9f13SBarry Smith proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3733273d9f13SBarry Smith proc2: d_nnz = [1,1] and o_nnz = [4,4] 3734273d9f13SBarry Smith .ve 3735273d9f13SBarry Smith Here the space allocated is sum of all the above values i.e 34, and 3736273d9f13SBarry Smith hence pre-allocation is perfect. 3737273d9f13SBarry Smith 3738273d9f13SBarry Smith Level: intermediate 3739273d9f13SBarry Smith 3740273d9f13SBarry Smith .keywords: matrix, aij, compressed row, sparse, parallel 3741273d9f13SBarry Smith 3742ccd8e176SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 37432fb0ec9aSBarry Smith MPIAIJ, MatCreateMPIAIJWithArrays() 3744273d9f13SBarry Smith @*/ 3745be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3746273d9f13SBarry Smith { 37476849ba73SBarry Smith PetscErrorCode ierr; 3748b1d57f15SBarry Smith PetscMPIInt size; 3749273d9f13SBarry Smith 3750273d9f13SBarry Smith PetscFunctionBegin; 3751f69a0ea3SMatthew Knepley ierr = MatCreate(comm,A);CHKERRQ(ierr); 3752f69a0ea3SMatthew Knepley ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3753273d9f13SBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3754273d9f13SBarry Smith if (size > 1) { 3755273d9f13SBarry Smith ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3756273d9f13SBarry Smith ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3757273d9f13SBarry Smith } else { 3758273d9f13SBarry Smith ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3759273d9f13SBarry Smith ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3760273d9f13SBarry Smith } 3761273d9f13SBarry Smith PetscFunctionReturn(0); 3762273d9f13SBarry Smith } 3763195d93cdSBarry Smith 37644a2ae208SSatish Balay #undef __FUNCT__ 37654a2ae208SSatish Balay #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3766be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3767195d93cdSBarry Smith { 3768195d93cdSBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 3769b1d57f15SBarry Smith 3770195d93cdSBarry Smith PetscFunctionBegin; 3771195d93cdSBarry Smith *Ad = a->A; 3772195d93cdSBarry Smith *Ao = a->B; 3773195d93cdSBarry Smith *colmap = a->garray; 3774195d93cdSBarry Smith PetscFunctionReturn(0); 3775195d93cdSBarry Smith } 3776a2243be0SBarry Smith 3777a2243be0SBarry Smith #undef __FUNCT__ 3778a2243be0SBarry Smith #define __FUNCT__ "MatSetColoring_MPIAIJ" 3779dfbe8321SBarry Smith PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3780a2243be0SBarry Smith { 3781dfbe8321SBarry Smith PetscErrorCode ierr; 3782b1d57f15SBarry Smith PetscInt i; 3783a2243be0SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3784a2243be0SBarry Smith 3785a2243be0SBarry Smith PetscFunctionBegin; 37868ee2e534SBarry Smith if (coloring->ctype == IS_COLORING_GLOBAL) { 378708b6dcc0SBarry Smith ISColoringValue *allcolors,*colors; 3788a2243be0SBarry Smith ISColoring ocoloring; 3789a2243be0SBarry Smith 3790a2243be0SBarry Smith /* set coloring for diagonal portion */ 3791a2243be0SBarry Smith ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3792a2243be0SBarry Smith 3793a2243be0SBarry Smith /* set coloring for off-diagonal portion */ 37947adad957SLisandro Dalcin ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 3795d0f46423SBarry Smith ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3796d0f46423SBarry Smith for (i=0; i<a->B->cmap->n; i++) { 3797a2243be0SBarry Smith colors[i] = allcolors[a->garray[i]]; 3798a2243be0SBarry Smith } 3799a2243be0SBarry Smith ierr = PetscFree(allcolors);CHKERRQ(ierr); 3800d0f46423SBarry Smith ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3801a2243be0SBarry Smith ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3802a2243be0SBarry Smith ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3803a2243be0SBarry Smith } else if (coloring->ctype == IS_COLORING_GHOSTED) { 380408b6dcc0SBarry Smith ISColoringValue *colors; 3805b1d57f15SBarry Smith PetscInt *larray; 3806a2243be0SBarry Smith ISColoring ocoloring; 3807a2243be0SBarry Smith 3808a2243be0SBarry Smith /* set coloring for diagonal portion */ 3809d0f46423SBarry Smith ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3810d0f46423SBarry Smith for (i=0; i<a->A->cmap->n; i++) { 3811d0f46423SBarry Smith larray[i] = i + A->cmap->rstart; 3812a2243be0SBarry Smith } 3813d0f46423SBarry Smith ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 3814d0f46423SBarry Smith ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3815d0f46423SBarry Smith for (i=0; i<a->A->cmap->n; i++) { 3816a2243be0SBarry Smith colors[i] = coloring->colors[larray[i]]; 3817a2243be0SBarry Smith } 3818a2243be0SBarry Smith ierr = PetscFree(larray);CHKERRQ(ierr); 3819d0f46423SBarry Smith ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3820a2243be0SBarry Smith ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3821a2243be0SBarry Smith ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3822a2243be0SBarry Smith 3823a2243be0SBarry Smith /* set coloring for off-diagonal portion */ 3824d0f46423SBarry Smith ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3825d0f46423SBarry Smith ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 3826d0f46423SBarry Smith ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3827d0f46423SBarry Smith for (i=0; i<a->B->cmap->n; i++) { 3828a2243be0SBarry Smith colors[i] = coloring->colors[larray[i]]; 3829a2243be0SBarry Smith } 3830a2243be0SBarry Smith ierr = PetscFree(larray);CHKERRQ(ierr); 3831d0f46423SBarry Smith ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3832a2243be0SBarry Smith ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3833a2243be0SBarry Smith ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3834a2243be0SBarry Smith } else { 383577431f27SBarry Smith SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3836a2243be0SBarry Smith } 3837a2243be0SBarry Smith 3838a2243be0SBarry Smith PetscFunctionReturn(0); 3839a2243be0SBarry Smith } 3840a2243be0SBarry Smith 3841dcf5cc72SBarry Smith #if defined(PETSC_HAVE_ADIC) 3842a2243be0SBarry Smith #undef __FUNCT__ 3843779c1a83SBarry Smith #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 3844dfbe8321SBarry Smith PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 3845a2243be0SBarry Smith { 3846a2243be0SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3847dfbe8321SBarry Smith PetscErrorCode ierr; 3848a2243be0SBarry Smith 3849a2243be0SBarry Smith PetscFunctionBegin; 3850779c1a83SBarry Smith ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 3851779c1a83SBarry Smith ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 3852779c1a83SBarry Smith PetscFunctionReturn(0); 3853779c1a83SBarry Smith } 3854dcf5cc72SBarry Smith #endif 3855779c1a83SBarry Smith 3856779c1a83SBarry Smith #undef __FUNCT__ 3857779c1a83SBarry Smith #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3858b1d57f15SBarry Smith PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3859779c1a83SBarry Smith { 3860779c1a83SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3861dfbe8321SBarry Smith PetscErrorCode ierr; 3862779c1a83SBarry Smith 3863779c1a83SBarry Smith PetscFunctionBegin; 3864779c1a83SBarry Smith ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3865779c1a83SBarry Smith ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3866a2243be0SBarry Smith PetscFunctionReturn(0); 3867a2243be0SBarry Smith } 3868c5d6d63eSBarry Smith 3869c5d6d63eSBarry Smith #undef __FUNCT__ 387051dd7536SBarry Smith #define __FUNCT__ "MatMerge" 3871bc08b0f1SBarry Smith /*@ 387251dd7536SBarry Smith MatMerge - Creates a single large PETSc matrix by concatinating sequential 387351dd7536SBarry Smith matrices from each processor 3874c5d6d63eSBarry Smith 3875c5d6d63eSBarry Smith Collective on MPI_Comm 3876c5d6d63eSBarry Smith 3877c5d6d63eSBarry Smith Input Parameters: 387851dd7536SBarry Smith + comm - the communicators the parallel matrix will live on 3879d6bb3c2dSHong Zhang . inmat - the input sequential matrices 38800e36024fSHong Zhang . n - number of local columns (or PETSC_DECIDE) 3881d6bb3c2dSHong Zhang - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 388251dd7536SBarry Smith 388351dd7536SBarry Smith Output Parameter: 388451dd7536SBarry Smith . outmat - the parallel matrix generated 3885c5d6d63eSBarry Smith 38867e25d530SSatish Balay Level: advanced 38877e25d530SSatish Balay 3888f08fae4eSHong Zhang Notes: The number of columns of the matrix in EACH processor MUST be the same. 3889c5d6d63eSBarry Smith 3890c5d6d63eSBarry Smith @*/ 3891be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3892c5d6d63eSBarry Smith { 3893dfbe8321SBarry Smith PetscErrorCode ierr; 3894b7940d39SSatish Balay PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 3895ba8c8a56SBarry Smith PetscInt *indx; 3896ba8c8a56SBarry Smith PetscScalar *values; 3897c5d6d63eSBarry Smith 3898c5d6d63eSBarry Smith PetscFunctionBegin; 38990e36024fSHong Zhang ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3900d6bb3c2dSHong Zhang if (scall == MAT_INITIAL_MATRIX){ 3901d6bb3c2dSHong Zhang /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 39020e36024fSHong Zhang if (n == PETSC_DECIDE){ 3903357abbc8SBarry Smith ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 39040e36024fSHong Zhang } 3905357abbc8SBarry Smith ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3906357abbc8SBarry Smith rstart -= m; 3907d6bb3c2dSHong Zhang 3908d6bb3c2dSHong Zhang ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3909d6bb3c2dSHong Zhang for (i=0;i<m;i++) { 3910ba8c8a56SBarry Smith ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3911d6bb3c2dSHong Zhang ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3912ba8c8a56SBarry Smith ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3913d6bb3c2dSHong Zhang } 3914d6bb3c2dSHong Zhang /* This routine will ONLY return MPIAIJ type matrix */ 3915f69a0ea3SMatthew Knepley ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3916f69a0ea3SMatthew Knepley ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3917d6bb3c2dSHong Zhang ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3918d6bb3c2dSHong Zhang ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3919d6bb3c2dSHong Zhang ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3920d6bb3c2dSHong Zhang 3921d6bb3c2dSHong Zhang } else if (scall == MAT_REUSE_MATRIX){ 3922d6bb3c2dSHong Zhang ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 3923d6bb3c2dSHong Zhang } else { 392477431f27SBarry Smith SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3925d6bb3c2dSHong Zhang } 3926d6bb3c2dSHong Zhang 3927d6bb3c2dSHong Zhang for (i=0;i<m;i++) { 3928ba8c8a56SBarry Smith ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3929b7940d39SSatish Balay Ii = i + rstart; 3930b7940d39SSatish Balay ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3931ba8c8a56SBarry Smith ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3932d6bb3c2dSHong Zhang } 3933d6bb3c2dSHong Zhang ierr = MatDestroy(inmat);CHKERRQ(ierr); 3934d6bb3c2dSHong Zhang ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3935d6bb3c2dSHong Zhang ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 393651dd7536SBarry Smith 3937c5d6d63eSBarry Smith PetscFunctionReturn(0); 3938c5d6d63eSBarry Smith } 3939c5d6d63eSBarry Smith 3940c5d6d63eSBarry Smith #undef __FUNCT__ 3941c5d6d63eSBarry Smith #define __FUNCT__ "MatFileSplit" 3942dfbe8321SBarry Smith PetscErrorCode MatFileSplit(Mat A,char *outfile) 3943c5d6d63eSBarry Smith { 3944dfbe8321SBarry Smith PetscErrorCode ierr; 394532dcc486SBarry Smith PetscMPIInt rank; 3946b1d57f15SBarry Smith PetscInt m,N,i,rstart,nnz; 3947de4209c5SBarry Smith size_t len; 3948b1d57f15SBarry Smith const PetscInt *indx; 3949c5d6d63eSBarry Smith PetscViewer out; 3950c5d6d63eSBarry Smith char *name; 3951c5d6d63eSBarry Smith Mat B; 3952b3cc6726SBarry Smith const PetscScalar *values; 3953c5d6d63eSBarry Smith 3954c5d6d63eSBarry Smith PetscFunctionBegin; 3955c5d6d63eSBarry Smith ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3956c5d6d63eSBarry Smith ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3957f204ca49SKris Buschelman /* Should this be the type of the diagonal block of A? */ 3958f69a0ea3SMatthew Knepley ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3959f69a0ea3SMatthew Knepley ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3960f204ca49SKris Buschelman ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3961f204ca49SKris Buschelman ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 3962c5d6d63eSBarry Smith ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3963c5d6d63eSBarry Smith for (i=0;i<m;i++) { 3964c5d6d63eSBarry Smith ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3965c5d6d63eSBarry Smith ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3966c5d6d63eSBarry Smith ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3967c5d6d63eSBarry Smith } 3968c5d6d63eSBarry Smith ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3969c5d6d63eSBarry Smith ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3970c5d6d63eSBarry Smith 39717adad957SLisandro Dalcin ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 3972c5d6d63eSBarry Smith ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3973c5d6d63eSBarry Smith ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 3974c5d6d63eSBarry Smith sprintf(name,"%s.%d",outfile,rank); 3975852598b0SBarry Smith ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3976c5d6d63eSBarry Smith ierr = PetscFree(name); 3977c5d6d63eSBarry Smith ierr = MatView(B,out);CHKERRQ(ierr); 3978c5d6d63eSBarry Smith ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 3979c5d6d63eSBarry Smith ierr = MatDestroy(B);CHKERRQ(ierr); 3980c5d6d63eSBarry Smith PetscFunctionReturn(0); 3981c5d6d63eSBarry Smith } 3982e5f2cdd8SHong Zhang 398351a7d1a8SHong Zhang EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 398451a7d1a8SHong Zhang #undef __FUNCT__ 398551a7d1a8SHong Zhang #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3986be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 398751a7d1a8SHong Zhang { 398851a7d1a8SHong Zhang PetscErrorCode ierr; 3989671beff6SHong Zhang Mat_Merge_SeqsToMPI *merge; 3990776b82aeSLisandro Dalcin PetscContainer container; 399151a7d1a8SHong Zhang 399251a7d1a8SHong Zhang PetscFunctionBegin; 3993671beff6SHong Zhang ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3994671beff6SHong Zhang if (container) { 3995776b82aeSLisandro Dalcin ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 399651a7d1a8SHong Zhang ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 39973e06a4e6SHong Zhang ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 39983e06a4e6SHong Zhang ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 399951a7d1a8SHong Zhang ierr = PetscFree(merge->bi);CHKERRQ(ierr); 400051a7d1a8SHong Zhang ierr = PetscFree(merge->bj);CHKERRQ(ierr); 400102c68681SHong Zhang ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 400202c68681SHong Zhang ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 400305b42c5fSBarry Smith ierr = PetscFree(merge->coi);CHKERRQ(ierr); 400405b42c5fSBarry Smith ierr = PetscFree(merge->coj);CHKERRQ(ierr); 400505b42c5fSBarry Smith ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 40062c72b5baSSatish Balay ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr); 4007671beff6SHong Zhang 4008776b82aeSLisandro Dalcin ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 4009671beff6SHong Zhang ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4010671beff6SHong Zhang } 401151a7d1a8SHong Zhang ierr = PetscFree(merge);CHKERRQ(ierr); 401251a7d1a8SHong Zhang 401351a7d1a8SHong Zhang ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 401451a7d1a8SHong Zhang PetscFunctionReturn(0); 401551a7d1a8SHong Zhang } 401651a7d1a8SHong Zhang 40177c4f633dSBarry Smith #include "../src/mat/utils/freespace.h" 4018be0fcf8dSHong Zhang #include "petscbt.h" 40194ebed01fSBarry Smith 4020e5f2cdd8SHong Zhang #undef __FUNCT__ 402138f152feSBarry Smith #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 4022e5f2cdd8SHong Zhang /*@C 4023f08fae4eSHong Zhang MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 4024e5f2cdd8SHong Zhang matrices from each processor 4025e5f2cdd8SHong Zhang 4026e5f2cdd8SHong Zhang Collective on MPI_Comm 4027e5f2cdd8SHong Zhang 4028e5f2cdd8SHong Zhang Input Parameters: 4029e5f2cdd8SHong Zhang + comm - the communicators the parallel matrix will live on 4030f08fae4eSHong Zhang . seqmat - the input sequential matrices 40310e36024fSHong Zhang . m - number of local rows (or PETSC_DECIDE) 40320e36024fSHong Zhang . n - number of local columns (or PETSC_DECIDE) 4033e5f2cdd8SHong Zhang - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4034e5f2cdd8SHong Zhang 4035e5f2cdd8SHong Zhang Output Parameter: 4036f08fae4eSHong Zhang . mpimat - the parallel matrix generated 4037e5f2cdd8SHong Zhang 4038e5f2cdd8SHong Zhang Level: advanced 4039e5f2cdd8SHong Zhang 4040affca5deSHong Zhang Notes: 4041affca5deSHong Zhang The dimensions of the sequential matrix in each processor MUST be the same. 4042affca5deSHong Zhang The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4043affca5deSHong Zhang destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4044e5f2cdd8SHong Zhang @*/ 4045be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 404655d1abb9SHong Zhang { 404755d1abb9SHong Zhang PetscErrorCode ierr; 40487adad957SLisandro Dalcin MPI_Comm comm=((PetscObject)mpimat)->comm; 404955d1abb9SHong Zhang Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4050b1d57f15SBarry Smith PetscMPIInt size,rank,taga,*len_s; 4051d0f46423SBarry Smith PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4052b1d57f15SBarry Smith PetscInt proc,m; 4053b1d57f15SBarry Smith PetscInt **buf_ri,**buf_rj; 4054b1d57f15SBarry Smith PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4055b1d57f15SBarry Smith PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 405655d1abb9SHong Zhang MPI_Request *s_waits,*r_waits; 405755d1abb9SHong Zhang MPI_Status *status; 4058a77337e4SBarry Smith MatScalar *aa=a->a; 4059dd6ea824SBarry Smith MatScalar **abuf_r,*ba_i; 406055d1abb9SHong Zhang Mat_Merge_SeqsToMPI *merge; 4061776b82aeSLisandro Dalcin PetscContainer container; 406255d1abb9SHong Zhang 406355d1abb9SHong Zhang PetscFunctionBegin; 40644ebed01fSBarry Smith ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 40653c2c1871SHong Zhang 406655d1abb9SHong Zhang ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 406755d1abb9SHong Zhang ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 406855d1abb9SHong Zhang 406955d1abb9SHong Zhang ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 407055d1abb9SHong Zhang if (container) { 4071776b82aeSLisandro Dalcin ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 407255d1abb9SHong Zhang } 407355d1abb9SHong Zhang bi = merge->bi; 407455d1abb9SHong Zhang bj = merge->bj; 407555d1abb9SHong Zhang buf_ri = merge->buf_ri; 407655d1abb9SHong Zhang buf_rj = merge->buf_rj; 407755d1abb9SHong Zhang 407855d1abb9SHong Zhang ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4079357abbc8SBarry Smith owners = merge->rowmap.range; 408055d1abb9SHong Zhang len_s = merge->len_s; 408155d1abb9SHong Zhang 408255d1abb9SHong Zhang /* send and recv matrix values */ 408355d1abb9SHong Zhang /*-----------------------------*/ 4084357abbc8SBarry Smith ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 408555d1abb9SHong Zhang ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 408655d1abb9SHong Zhang 408755d1abb9SHong Zhang ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 408855d1abb9SHong Zhang for (proc=0,k=0; proc<size; proc++){ 408955d1abb9SHong Zhang if (!len_s[proc]) continue; 409055d1abb9SHong Zhang i = owners[proc]; 409155d1abb9SHong Zhang ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 409255d1abb9SHong Zhang k++; 409355d1abb9SHong Zhang } 409455d1abb9SHong Zhang 40950c468ba9SBarry Smith if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 40960c468ba9SBarry Smith if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 409755d1abb9SHong Zhang ierr = PetscFree(status);CHKERRQ(ierr); 409855d1abb9SHong Zhang 409955d1abb9SHong Zhang ierr = PetscFree(s_waits);CHKERRQ(ierr); 410055d1abb9SHong Zhang ierr = PetscFree(r_waits);CHKERRQ(ierr); 410155d1abb9SHong Zhang 410255d1abb9SHong Zhang /* insert mat values of mpimat */ 410355d1abb9SHong Zhang /*----------------------------*/ 4104a77337e4SBarry Smith ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4105b1d57f15SBarry Smith ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 410655d1abb9SHong Zhang nextrow = buf_ri_k + merge->nrecv; 410755d1abb9SHong Zhang nextai = nextrow + merge->nrecv; 410855d1abb9SHong Zhang 410955d1abb9SHong Zhang for (k=0; k<merge->nrecv; k++){ 411055d1abb9SHong Zhang buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 411155d1abb9SHong Zhang nrows = *(buf_ri_k[k]); 411255d1abb9SHong Zhang nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 411355d1abb9SHong Zhang nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 411455d1abb9SHong Zhang } 411555d1abb9SHong Zhang 411655d1abb9SHong Zhang /* set values of ba */ 4117357abbc8SBarry Smith m = merge->rowmap.n; 411855d1abb9SHong Zhang for (i=0; i<m; i++) { 411955d1abb9SHong Zhang arow = owners[rank] + i; 412055d1abb9SHong Zhang bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 412155d1abb9SHong Zhang bnzi = bi[i+1] - bi[i]; 4122a77337e4SBarry Smith ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 412355d1abb9SHong Zhang 412455d1abb9SHong Zhang /* add local non-zero vals of this proc's seqmat into ba */ 412555d1abb9SHong Zhang anzi = ai[arow+1] - ai[arow]; 412655d1abb9SHong Zhang aj = a->j + ai[arow]; 412755d1abb9SHong Zhang aa = a->a + ai[arow]; 412855d1abb9SHong Zhang nextaj = 0; 412955d1abb9SHong Zhang for (j=0; nextaj<anzi; j++){ 413055d1abb9SHong Zhang if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 413155d1abb9SHong Zhang ba_i[j] += aa[nextaj++]; 413255d1abb9SHong Zhang } 413355d1abb9SHong Zhang } 413455d1abb9SHong Zhang 413555d1abb9SHong Zhang /* add received vals into ba */ 413655d1abb9SHong Zhang for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 413755d1abb9SHong Zhang /* i-th row */ 413855d1abb9SHong Zhang if (i == *nextrow[k]) { 413955d1abb9SHong Zhang anzi = *(nextai[k]+1) - *nextai[k]; 414055d1abb9SHong Zhang aj = buf_rj[k] + *(nextai[k]); 414155d1abb9SHong Zhang aa = abuf_r[k] + *(nextai[k]); 414255d1abb9SHong Zhang nextaj = 0; 414355d1abb9SHong Zhang for (j=0; nextaj<anzi; j++){ 414455d1abb9SHong Zhang if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 414555d1abb9SHong Zhang ba_i[j] += aa[nextaj++]; 414655d1abb9SHong Zhang } 414755d1abb9SHong Zhang } 414855d1abb9SHong Zhang nextrow[k]++; nextai[k]++; 414955d1abb9SHong Zhang } 415055d1abb9SHong Zhang } 415155d1abb9SHong Zhang ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 415255d1abb9SHong Zhang } 415355d1abb9SHong Zhang ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 415455d1abb9SHong Zhang ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 415555d1abb9SHong Zhang 415655d1abb9SHong Zhang ierr = PetscFree(abuf_r);CHKERRQ(ierr); 415755d1abb9SHong Zhang ierr = PetscFree(ba_i);CHKERRQ(ierr); 415855d1abb9SHong Zhang ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 41594ebed01fSBarry Smith ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 416055d1abb9SHong Zhang PetscFunctionReturn(0); 416155d1abb9SHong Zhang } 416238f152feSBarry Smith 416338f152feSBarry Smith #undef __FUNCT__ 416438f152feSBarry Smith #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4165be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4166e5f2cdd8SHong Zhang { 4167f08fae4eSHong Zhang PetscErrorCode ierr; 416855a3bba9SHong Zhang Mat B_mpi; 4169c2234fe3SHong Zhang Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4170b1d57f15SBarry Smith PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4171b1d57f15SBarry Smith PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4172d0f46423SBarry Smith PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4173b1d57f15SBarry Smith PetscInt len,proc,*dnz,*onz; 4174b1d57f15SBarry Smith PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4175b1d57f15SBarry Smith PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 417655d1abb9SHong Zhang MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 417758cb9c82SHong Zhang MPI_Status *status; 4178a1a86e44SBarry Smith PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4179be0fcf8dSHong Zhang PetscBT lnkbt; 418051a7d1a8SHong Zhang Mat_Merge_SeqsToMPI *merge; 4181776b82aeSLisandro Dalcin PetscContainer container; 418202c68681SHong Zhang 4183e5f2cdd8SHong Zhang PetscFunctionBegin; 41844ebed01fSBarry Smith ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 41853c2c1871SHong Zhang 418638f152feSBarry Smith /* make sure it is a PETSc comm */ 418738f152feSBarry Smith ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4188e5f2cdd8SHong Zhang ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4189e5f2cdd8SHong Zhang ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 419055d1abb9SHong Zhang 419151a7d1a8SHong Zhang ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4192c2234fe3SHong Zhang ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4193e5f2cdd8SHong Zhang 41946abd8857SHong Zhang /* determine row ownership */ 4195f08fae4eSHong Zhang /*---------------------------------------------------------*/ 4196b167c4dbSHong Zhang ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr); 4197899cda47SBarry Smith merge->rowmap.n = m; 4198899cda47SBarry Smith merge->rowmap.N = M; 4199fc42d0c8SSatish Balay merge->rowmap.bs = 1; 42006148ca0dSBarry Smith ierr = PetscMapSetUp(&merge->rowmap);CHKERRQ(ierr); 4201b1d57f15SBarry Smith ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4202b1d57f15SBarry Smith ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 420355d1abb9SHong Zhang 4204357abbc8SBarry Smith m = merge->rowmap.n; 4205357abbc8SBarry Smith M = merge->rowmap.N; 4206357abbc8SBarry Smith owners = merge->rowmap.range; 42076abd8857SHong Zhang 42086abd8857SHong Zhang /* determine the number of messages to send, their lengths */ 42096abd8857SHong Zhang /*---------------------------------------------------------*/ 42103e06a4e6SHong Zhang len_s = merge->len_s; 421151a7d1a8SHong Zhang 42122257cef7SHong Zhang len = 0; /* length of buf_si[] */ 4213c2234fe3SHong Zhang merge->nsend = 0; 4214409913e3SHong Zhang for (proc=0; proc<size; proc++){ 42152257cef7SHong Zhang len_si[proc] = 0; 42163e06a4e6SHong Zhang if (proc == rank){ 42176abd8857SHong Zhang len_s[proc] = 0; 42183e06a4e6SHong Zhang } else { 421902c68681SHong Zhang len_si[proc] = owners[proc+1] - owners[proc] + 1; 42203e06a4e6SHong Zhang len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 42213e06a4e6SHong Zhang } 42223e06a4e6SHong Zhang if (len_s[proc]) { 4223c2234fe3SHong Zhang merge->nsend++; 42242257cef7SHong Zhang nrows = 0; 42252257cef7SHong Zhang for (i=owners[proc]; i<owners[proc+1]; i++){ 42262257cef7SHong Zhang if (ai[i+1] > ai[i]) nrows++; 42272257cef7SHong Zhang } 42282257cef7SHong Zhang len_si[proc] = 2*(nrows+1); 42292257cef7SHong Zhang len += len_si[proc]; 4230409913e3SHong Zhang } 423158cb9c82SHong Zhang } 4232409913e3SHong Zhang 42332257cef7SHong Zhang /* determine the number and length of messages to receive for ij-structure */ 42342257cef7SHong Zhang /*-------------------------------------------------------------------------*/ 423551a7d1a8SHong Zhang ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 423655d1abb9SHong Zhang ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4237671beff6SHong Zhang 42383e06a4e6SHong Zhang /* post the Irecv of j-structure */ 42393e06a4e6SHong Zhang /*-------------------------------*/ 42402c72b5baSSatish Balay ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 42413e06a4e6SHong Zhang ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 424202c68681SHong Zhang 42433e06a4e6SHong Zhang /* post the Isend of j-structure */ 4244affca5deSHong Zhang /*--------------------------------*/ 42452257cef7SHong Zhang ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 424602c68681SHong Zhang sj_waits = si_waits + merge->nsend; 42473e06a4e6SHong Zhang 42482257cef7SHong Zhang for (proc=0, k=0; proc<size; proc++){ 4249409913e3SHong Zhang if (!len_s[proc]) continue; 425002c68681SHong Zhang i = owners[proc]; 4251b1d57f15SBarry Smith ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 425251a7d1a8SHong Zhang k++; 425351a7d1a8SHong Zhang } 425451a7d1a8SHong Zhang 42553e06a4e6SHong Zhang /* receives and sends of j-structure are complete */ 42563e06a4e6SHong Zhang /*------------------------------------------------*/ 42570c468ba9SBarry Smith if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 42580c468ba9SBarry Smith if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 425902c68681SHong Zhang 426002c68681SHong Zhang /* send and recv i-structure */ 426102c68681SHong Zhang /*---------------------------*/ 42622c72b5baSSatish Balay ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 426302c68681SHong Zhang ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 426402c68681SHong Zhang 4265b1d57f15SBarry Smith ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 42663e06a4e6SHong Zhang buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 42672257cef7SHong Zhang for (proc=0,k=0; proc<size; proc++){ 426802c68681SHong Zhang if (!len_s[proc]) continue; 42693e06a4e6SHong Zhang /* form outgoing message for i-structure: 42703e06a4e6SHong Zhang buf_si[0]: nrows to be sent 42713e06a4e6SHong Zhang [1:nrows]: row index (global) 42723e06a4e6SHong Zhang [nrows+1:2*nrows+1]: i-structure index 42733e06a4e6SHong Zhang */ 42743e06a4e6SHong Zhang /*-------------------------------------------*/ 42752257cef7SHong Zhang nrows = len_si[proc]/2 - 1; 42763e06a4e6SHong Zhang buf_si_i = buf_si + nrows+1; 42773e06a4e6SHong Zhang buf_si[0] = nrows; 42783e06a4e6SHong Zhang buf_si_i[0] = 0; 42793e06a4e6SHong Zhang nrows = 0; 42803e06a4e6SHong Zhang for (i=owners[proc]; i<owners[proc+1]; i++){ 42813e06a4e6SHong Zhang anzi = ai[i+1] - ai[i]; 42823e06a4e6SHong Zhang if (anzi) { 42833e06a4e6SHong Zhang buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 42843e06a4e6SHong Zhang buf_si[nrows+1] = i-owners[proc]; /* local row index */ 42853e06a4e6SHong Zhang nrows++; 42863e06a4e6SHong Zhang } 42873e06a4e6SHong Zhang } 4288b1d57f15SBarry Smith ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 428902c68681SHong Zhang k++; 42902257cef7SHong Zhang buf_si += len_si[proc]; 429102c68681SHong Zhang } 42922257cef7SHong Zhang 42930c468ba9SBarry Smith if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 42940c468ba9SBarry Smith if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 429502c68681SHong Zhang 4296ae15b995SBarry Smith ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 42973e06a4e6SHong Zhang for (i=0; i<merge->nrecv; i++){ 4298ae15b995SBarry Smith ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 42993e06a4e6SHong Zhang } 43003e06a4e6SHong Zhang 43013e06a4e6SHong Zhang ierr = PetscFree(len_si);CHKERRQ(ierr); 430202c68681SHong Zhang ierr = PetscFree(len_ri);CHKERRQ(ierr); 430302c68681SHong Zhang ierr = PetscFree(rj_waits);CHKERRQ(ierr); 43043e06a4e6SHong Zhang ierr = PetscFree(si_waits);CHKERRQ(ierr); 43052257cef7SHong Zhang ierr = PetscFree(ri_waits);CHKERRQ(ierr); 43063e06a4e6SHong Zhang ierr = PetscFree(buf_s);CHKERRQ(ierr); 4307bcc1bcd5SHong Zhang ierr = PetscFree(status);CHKERRQ(ierr); 430858cb9c82SHong Zhang 4309bcc1bcd5SHong Zhang /* compute a local seq matrix in each processor */ 4310bcc1bcd5SHong Zhang /*----------------------------------------------*/ 431158cb9c82SHong Zhang /* allocate bi array and free space for accumulating nonzero column info */ 4312b1d57f15SBarry Smith ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 431358cb9c82SHong Zhang bi[0] = 0; 431458cb9c82SHong Zhang 4315be0fcf8dSHong Zhang /* create and initialize a linked list */ 4316be0fcf8dSHong Zhang nlnk = N+1; 4317be0fcf8dSHong Zhang ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 431858cb9c82SHong Zhang 4319bcc1bcd5SHong Zhang /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 432058cb9c82SHong Zhang len = 0; 4321bcc1bcd5SHong Zhang len = ai[owners[rank+1]] - ai[owners[rank]]; 4322a1a86e44SBarry Smith ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 432358cb9c82SHong Zhang current_space = free_space; 432458cb9c82SHong Zhang 4325bcc1bcd5SHong Zhang /* determine symbolic info for each local row */ 4326b1d57f15SBarry Smith ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 43273e06a4e6SHong Zhang nextrow = buf_ri_k + merge->nrecv; 43283e06a4e6SHong Zhang nextai = nextrow + merge->nrecv; 43293e06a4e6SHong Zhang for (k=0; k<merge->nrecv; k++){ 43302257cef7SHong Zhang buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 43313e06a4e6SHong Zhang nrows = *buf_ri_k[k]; 43323e06a4e6SHong Zhang nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 43332257cef7SHong Zhang nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 43343e06a4e6SHong Zhang } 43352257cef7SHong Zhang 4336bcc1bcd5SHong Zhang ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4337bcc1bcd5SHong Zhang len = 0; 433858cb9c82SHong Zhang for (i=0;i<m;i++) { 433958cb9c82SHong Zhang bnzi = 0; 434058cb9c82SHong Zhang /* add local non-zero cols of this proc's seqmat into lnk */ 434158cb9c82SHong Zhang arow = owners[rank] + i; 434258cb9c82SHong Zhang anzi = ai[arow+1] - ai[arow]; 434358cb9c82SHong Zhang aj = a->j + ai[arow]; 4344be0fcf8dSHong Zhang ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 434558cb9c82SHong Zhang bnzi += nlnk; 434658cb9c82SHong Zhang /* add received col data into lnk */ 434751a7d1a8SHong Zhang for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 434855d1abb9SHong Zhang if (i == *nextrow[k]) { /* i-th row */ 43493e06a4e6SHong Zhang anzi = *(nextai[k]+1) - *nextai[k]; 43503e06a4e6SHong Zhang aj = buf_rj[k] + *nextai[k]; 43513e06a4e6SHong Zhang ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 43523e06a4e6SHong Zhang bnzi += nlnk; 43533e06a4e6SHong Zhang nextrow[k]++; nextai[k]++; 43543e06a4e6SHong Zhang } 435558cb9c82SHong Zhang } 4356bcc1bcd5SHong Zhang if (len < bnzi) len = bnzi; /* =max(bnzi) */ 435758cb9c82SHong Zhang 435858cb9c82SHong Zhang /* if free space is not available, make more free space */ 435958cb9c82SHong Zhang if (current_space->local_remaining<bnzi) { 43604238b7adSHong Zhang ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 436158cb9c82SHong Zhang nspacedouble++; 436258cb9c82SHong Zhang } 436358cb9c82SHong Zhang /* copy data into free space, then initialize lnk */ 4364be0fcf8dSHong Zhang ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4365bcc1bcd5SHong Zhang ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4366bcc1bcd5SHong Zhang 436758cb9c82SHong Zhang current_space->array += bnzi; 436858cb9c82SHong Zhang current_space->local_used += bnzi; 436958cb9c82SHong Zhang current_space->local_remaining -= bnzi; 437058cb9c82SHong Zhang 437158cb9c82SHong Zhang bi[i+1] = bi[i] + bnzi; 437258cb9c82SHong Zhang } 4373bcc1bcd5SHong Zhang 4374bcc1bcd5SHong Zhang ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 4375bcc1bcd5SHong Zhang 4376b1d57f15SBarry Smith ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4377a1a86e44SBarry Smith ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4378be0fcf8dSHong Zhang ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4379409913e3SHong Zhang 4380bcc1bcd5SHong Zhang /* create symbolic parallel matrix B_mpi */ 4381bcc1bcd5SHong Zhang /*---------------------------------------*/ 4382f69a0ea3SMatthew Knepley ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 438354b84b50SHong Zhang if (n==PETSC_DECIDE) { 4384f69a0ea3SMatthew Knepley ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 438554b84b50SHong Zhang } else { 4386f69a0ea3SMatthew Knepley ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 438754b84b50SHong Zhang } 4388bcc1bcd5SHong Zhang ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4389bcc1bcd5SHong Zhang ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4390bcc1bcd5SHong Zhang ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 439158cb9c82SHong Zhang 43926abd8857SHong Zhang /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 43936abd8857SHong Zhang B_mpi->assembled = PETSC_FALSE; 4394affca5deSHong Zhang B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4395affca5deSHong Zhang merge->bi = bi; 4396affca5deSHong Zhang merge->bj = bj; 439702c68681SHong Zhang merge->buf_ri = buf_ri; 439802c68681SHong Zhang merge->buf_rj = buf_rj; 4399de0260b3SHong Zhang merge->coi = PETSC_NULL; 4400de0260b3SHong Zhang merge->coj = PETSC_NULL; 4401de0260b3SHong Zhang merge->owners_co = PETSC_NULL; 4402affca5deSHong Zhang 4403affca5deSHong Zhang /* attach the supporting struct to B_mpi for reuse */ 4404776b82aeSLisandro Dalcin ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4405776b82aeSLisandro Dalcin ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4406affca5deSHong Zhang ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4407affca5deSHong Zhang *mpimat = B_mpi; 440838f152feSBarry Smith 440938f152feSBarry Smith ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 44104ebed01fSBarry Smith ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4411e5f2cdd8SHong Zhang PetscFunctionReturn(0); 4412e5f2cdd8SHong Zhang } 441325616d81SHong Zhang 441438f152feSBarry Smith #undef __FUNCT__ 441538f152feSBarry Smith #define __FUNCT__ "MatMerge_SeqsToMPI" 4416be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 441755d1abb9SHong Zhang { 441855d1abb9SHong Zhang PetscErrorCode ierr; 441955d1abb9SHong Zhang 442055d1abb9SHong Zhang PetscFunctionBegin; 44214ebed01fSBarry Smith ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 442255d1abb9SHong Zhang if (scall == MAT_INITIAL_MATRIX){ 442355d1abb9SHong Zhang ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 442455d1abb9SHong Zhang } 442555d1abb9SHong Zhang ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 44264ebed01fSBarry Smith ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 442755d1abb9SHong Zhang PetscFunctionReturn(0); 442855d1abb9SHong Zhang } 44294ebed01fSBarry Smith 443025616d81SHong Zhang #undef __FUNCT__ 443125616d81SHong Zhang #define __FUNCT__ "MatGetLocalMat" 4432bc08b0f1SBarry Smith /*@ 443332fba14fSHong Zhang MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 443425616d81SHong Zhang 443532fba14fSHong Zhang Not Collective 443625616d81SHong Zhang 443725616d81SHong Zhang Input Parameters: 443825616d81SHong Zhang + A - the matrix 443925616d81SHong Zhang . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 444025616d81SHong Zhang 444125616d81SHong Zhang Output Parameter: 444225616d81SHong Zhang . A_loc - the local sequential matrix generated 444325616d81SHong Zhang 444425616d81SHong Zhang Level: developer 444525616d81SHong Zhang 444625616d81SHong Zhang @*/ 4447be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 444825616d81SHong Zhang { 444925616d81SHong Zhang PetscErrorCode ierr; 445001b7ae99SHong Zhang Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 445101b7ae99SHong Zhang Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 445201b7ae99SHong Zhang PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4453a77337e4SBarry Smith MatScalar *aa=a->a,*ba=b->a,*cam; 4454a77337e4SBarry Smith PetscScalar *ca; 4455d0f46423SBarry Smith PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 44565a7d977cSHong Zhang PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 445725616d81SHong Zhang 445825616d81SHong Zhang PetscFunctionBegin; 44594ebed01fSBarry Smith ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 446001b7ae99SHong Zhang if (scall == MAT_INITIAL_MATRIX){ 4461dea91ad1SHong Zhang ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4462dea91ad1SHong Zhang ci[0] = 0; 446301b7ae99SHong Zhang for (i=0; i<am; i++){ 4464dea91ad1SHong Zhang ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 446501b7ae99SHong Zhang } 4466dea91ad1SHong Zhang ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4467dea91ad1SHong Zhang ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4468dea91ad1SHong Zhang k = 0; 446901b7ae99SHong Zhang for (i=0; i<am; i++) { 44705a7d977cSHong Zhang ncols_o = bi[i+1] - bi[i]; 44715a7d977cSHong Zhang ncols_d = ai[i+1] - ai[i]; 447201b7ae99SHong Zhang /* off-diagonal portion of A */ 44735a7d977cSHong Zhang for (jo=0; jo<ncols_o; jo++) { 44745a7d977cSHong Zhang col = cmap[*bj]; 44755a7d977cSHong Zhang if (col >= cstart) break; 44765a7d977cSHong Zhang cj[k] = col; bj++; 44775a7d977cSHong Zhang ca[k++] = *ba++; 44785a7d977cSHong Zhang } 44795a7d977cSHong Zhang /* diagonal portion of A */ 44805a7d977cSHong Zhang for (j=0; j<ncols_d; j++) { 44815a7d977cSHong Zhang cj[k] = cstart + *aj++; 44825a7d977cSHong Zhang ca[k++] = *aa++; 44835a7d977cSHong Zhang } 44845a7d977cSHong Zhang /* off-diagonal portion of A */ 44855a7d977cSHong Zhang for (j=jo; j<ncols_o; j++) { 44865a7d977cSHong Zhang cj[k] = cmap[*bj++]; 44875a7d977cSHong Zhang ca[k++] = *ba++; 44885a7d977cSHong Zhang } 448925616d81SHong Zhang } 4490dea91ad1SHong Zhang /* put together the new matrix */ 4491d0f46423SBarry Smith ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4492dea91ad1SHong Zhang /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4493dea91ad1SHong Zhang /* Since these are PETSc arrays, change flags to free them as necessary. */ 4494dea91ad1SHong Zhang mat = (Mat_SeqAIJ*)(*A_loc)->data; 4495e6b907acSBarry Smith mat->free_a = PETSC_TRUE; 4496e6b907acSBarry Smith mat->free_ij = PETSC_TRUE; 4497dea91ad1SHong Zhang mat->nonew = 0; 44985a7d977cSHong Zhang } else if (scall == MAT_REUSE_MATRIX){ 44995a7d977cSHong Zhang mat=(Mat_SeqAIJ*)(*A_loc)->data; 4500a77337e4SBarry Smith ci = mat->i; cj = mat->j; cam = mat->a; 45015a7d977cSHong Zhang for (i=0; i<am; i++) { 45025a7d977cSHong Zhang /* off-diagonal portion of A */ 45035a7d977cSHong Zhang ncols_o = bi[i+1] - bi[i]; 45045a7d977cSHong Zhang for (jo=0; jo<ncols_o; jo++) { 45055a7d977cSHong Zhang col = cmap[*bj]; 45065a7d977cSHong Zhang if (col >= cstart) break; 4507a77337e4SBarry Smith *cam++ = *ba++; bj++; 45085a7d977cSHong Zhang } 45095a7d977cSHong Zhang /* diagonal portion of A */ 4510ecc9b87dSHong Zhang ncols_d = ai[i+1] - ai[i]; 4511a77337e4SBarry Smith for (j=0; j<ncols_d; j++) *cam++ = *aa++; 45125a7d977cSHong Zhang /* off-diagonal portion of A */ 4513f33d1a9aSHong Zhang for (j=jo; j<ncols_o; j++) { 4514a77337e4SBarry Smith *cam++ = *ba++; bj++; 4515f33d1a9aSHong Zhang } 45165a7d977cSHong Zhang } 45175a7d977cSHong Zhang } else { 45185a7d977cSHong Zhang SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 451925616d81SHong Zhang } 452001b7ae99SHong Zhang 45214ebed01fSBarry Smith ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 452225616d81SHong Zhang PetscFunctionReturn(0); 452325616d81SHong Zhang } 452425616d81SHong Zhang 452532fba14fSHong Zhang #undef __FUNCT__ 452632fba14fSHong Zhang #define __FUNCT__ "MatGetLocalMatCondensed" 452732fba14fSHong Zhang /*@C 452832fba14fSHong Zhang MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 452932fba14fSHong Zhang 453032fba14fSHong Zhang Not Collective 453132fba14fSHong Zhang 453232fba14fSHong Zhang Input Parameters: 453332fba14fSHong Zhang + A - the matrix 453432fba14fSHong Zhang . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 453532fba14fSHong Zhang - row, col - index sets of rows and columns to extract (or PETSC_NULL) 453632fba14fSHong Zhang 453732fba14fSHong Zhang Output Parameter: 453832fba14fSHong Zhang . A_loc - the local sequential matrix generated 453932fba14fSHong Zhang 454032fba14fSHong Zhang Level: developer 454132fba14fSHong Zhang 454232fba14fSHong Zhang @*/ 4543be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 454432fba14fSHong Zhang { 454532fba14fSHong Zhang Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 454632fba14fSHong Zhang PetscErrorCode ierr; 454732fba14fSHong Zhang PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 454832fba14fSHong Zhang IS isrowa,iscola; 454932fba14fSHong Zhang Mat *aloc; 455032fba14fSHong Zhang 455132fba14fSHong Zhang PetscFunctionBegin; 45524ebed01fSBarry Smith ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 455332fba14fSHong Zhang if (!row){ 4554d0f46423SBarry Smith start = A->rmap->rstart; end = A->rmap->rend; 455532fba14fSHong Zhang ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 455632fba14fSHong Zhang } else { 455732fba14fSHong Zhang isrowa = *row; 455832fba14fSHong Zhang } 455932fba14fSHong Zhang if (!col){ 4560d0f46423SBarry Smith start = A->cmap->rstart; 456132fba14fSHong Zhang cmap = a->garray; 4562d0f46423SBarry Smith nzA = a->A->cmap->n; 4563d0f46423SBarry Smith nzB = a->B->cmap->n; 456432fba14fSHong Zhang ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 456532fba14fSHong Zhang ncols = 0; 456632fba14fSHong Zhang for (i=0; i<nzB; i++) { 456732fba14fSHong Zhang if (cmap[i] < start) idx[ncols++] = cmap[i]; 456832fba14fSHong Zhang else break; 456932fba14fSHong Zhang } 457032fba14fSHong Zhang imark = i; 457132fba14fSHong Zhang for (i=0; i<nzA; i++) idx[ncols++] = start + i; 457232fba14fSHong Zhang for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 457332fba14fSHong Zhang ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 457432fba14fSHong Zhang ierr = PetscFree(idx);CHKERRQ(ierr); 457532fba14fSHong Zhang } else { 457632fba14fSHong Zhang iscola = *col; 457732fba14fSHong Zhang } 457832fba14fSHong Zhang if (scall != MAT_INITIAL_MATRIX){ 457932fba14fSHong Zhang ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 458032fba14fSHong Zhang aloc[0] = *A_loc; 458132fba14fSHong Zhang } 458232fba14fSHong Zhang ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 458332fba14fSHong Zhang *A_loc = aloc[0]; 458432fba14fSHong Zhang ierr = PetscFree(aloc);CHKERRQ(ierr); 458532fba14fSHong Zhang if (!row){ 458632fba14fSHong Zhang ierr = ISDestroy(isrowa);CHKERRQ(ierr); 458732fba14fSHong Zhang } 458832fba14fSHong Zhang if (!col){ 458932fba14fSHong Zhang ierr = ISDestroy(iscola);CHKERRQ(ierr); 459032fba14fSHong Zhang } 45914ebed01fSBarry Smith ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 459232fba14fSHong Zhang PetscFunctionReturn(0); 459332fba14fSHong Zhang } 459432fba14fSHong Zhang 459525616d81SHong Zhang #undef __FUNCT__ 459625616d81SHong Zhang #define __FUNCT__ "MatGetBrowsOfAcols" 459725616d81SHong Zhang /*@C 459832fba14fSHong Zhang MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 459925616d81SHong Zhang 460025616d81SHong Zhang Collective on Mat 460125616d81SHong Zhang 460225616d81SHong Zhang Input Parameters: 4603e240928fSHong Zhang + A,B - the matrices in mpiaij format 460425616d81SHong Zhang . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 460525616d81SHong Zhang - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 460625616d81SHong Zhang 460725616d81SHong Zhang Output Parameter: 460825616d81SHong Zhang + rowb, colb - index sets of rows and columns of B to extract 4609d0f46423SBarry Smith . brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows 461025616d81SHong Zhang - B_seq - the sequential matrix generated 461125616d81SHong Zhang 461225616d81SHong Zhang Level: developer 461325616d81SHong Zhang 461425616d81SHong Zhang @*/ 4615be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 461625616d81SHong Zhang { 4617899cda47SBarry Smith Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 461825616d81SHong Zhang PetscErrorCode ierr; 4619b1d57f15SBarry Smith PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 462025616d81SHong Zhang IS isrowb,iscolb; 462125616d81SHong Zhang Mat *bseq; 462225616d81SHong Zhang 462325616d81SHong Zhang PetscFunctionBegin; 4624d0f46423SBarry Smith if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4625d0f46423SBarry Smith SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 462625616d81SHong Zhang } 46274ebed01fSBarry Smith ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 462825616d81SHong Zhang 462925616d81SHong Zhang if (scall == MAT_INITIAL_MATRIX){ 4630d0f46423SBarry Smith start = A->cmap->rstart; 463125616d81SHong Zhang cmap = a->garray; 4632d0f46423SBarry Smith nzA = a->A->cmap->n; 4633d0f46423SBarry Smith nzB = a->B->cmap->n; 4634b1d57f15SBarry Smith ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 463525616d81SHong Zhang ncols = 0; 46360390132cSHong Zhang for (i=0; i<nzB; i++) { /* row < local row index */ 463725616d81SHong Zhang if (cmap[i] < start) idx[ncols++] = cmap[i]; 463825616d81SHong Zhang else break; 463925616d81SHong Zhang } 464025616d81SHong Zhang imark = i; 46410390132cSHong Zhang for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 46420390132cSHong Zhang for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 464325616d81SHong Zhang ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 464425616d81SHong Zhang ierr = PetscFree(idx);CHKERRQ(ierr); 464525616d81SHong Zhang *brstart = imark; 4646d0f46423SBarry Smith ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 464725616d81SHong Zhang } else { 464825616d81SHong Zhang if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 464925616d81SHong Zhang isrowb = *rowb; iscolb = *colb; 465025616d81SHong Zhang ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 465125616d81SHong Zhang bseq[0] = *B_seq; 465225616d81SHong Zhang } 465325616d81SHong Zhang ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 465425616d81SHong Zhang *B_seq = bseq[0]; 465525616d81SHong Zhang ierr = PetscFree(bseq);CHKERRQ(ierr); 465625616d81SHong Zhang if (!rowb){ 465725616d81SHong Zhang ierr = ISDestroy(isrowb);CHKERRQ(ierr); 465825616d81SHong Zhang } else { 465925616d81SHong Zhang *rowb = isrowb; 466025616d81SHong Zhang } 466125616d81SHong Zhang if (!colb){ 466225616d81SHong Zhang ierr = ISDestroy(iscolb);CHKERRQ(ierr); 466325616d81SHong Zhang } else { 466425616d81SHong Zhang *colb = iscolb; 466525616d81SHong Zhang } 46664ebed01fSBarry Smith ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 466725616d81SHong Zhang PetscFunctionReturn(0); 466825616d81SHong Zhang } 4669429d309bSHong Zhang 4670a61c8c0fSHong Zhang #undef __FUNCT__ 4671a61c8c0fSHong Zhang #define __FUNCT__ "MatGetBrowsOfAoCols" 4672429d309bSHong Zhang /*@C 4673429d309bSHong Zhang MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 467401b7ae99SHong Zhang of the OFF-DIAGONAL portion of local A 4675429d309bSHong Zhang 4676429d309bSHong Zhang Collective on Mat 4677429d309bSHong Zhang 4678429d309bSHong Zhang Input Parameters: 4679429d309bSHong Zhang + A,B - the matrices in mpiaij format 468087025532SHong Zhang . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 468187025532SHong Zhang . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 468287025532SHong Zhang - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 4683429d309bSHong Zhang 4684429d309bSHong Zhang Output Parameter: 468587025532SHong Zhang + B_oth - the sequential matrix generated 4686429d309bSHong Zhang 4687429d309bSHong Zhang Level: developer 4688429d309bSHong Zhang 4689429d309bSHong Zhang @*/ 4690dd6ea824SBarry Smith PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,MatScalar **bufa_ptr,Mat *B_oth) 4691429d309bSHong Zhang { 4692a6b2eed2SHong Zhang VecScatter_MPI_General *gen_to,*gen_from; 4693429d309bSHong Zhang PetscErrorCode ierr; 4694899cda47SBarry Smith Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 469587025532SHong Zhang Mat_SeqAIJ *b_oth; 4696a6b2eed2SHong Zhang VecScatter ctx=a->Mvctx; 46977adad957SLisandro Dalcin MPI_Comm comm=((PetscObject)ctx)->comm; 46987adad957SLisandro Dalcin PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4699d0f46423SBarry Smith PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4700dd6ea824SBarry Smith PetscScalar *rvalues,*svalues; 4701dd6ea824SBarry Smith MatScalar *b_otha,*bufa,*bufA; 4702e42f35eeSHong Zhang PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4703910ba992SMatthew Knepley MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 470487025532SHong Zhang MPI_Status *sstatus,rstatus; 4705aa5bb8c0SSatish Balay PetscMPIInt jj; 4706e42f35eeSHong Zhang PetscInt *cols,sbs,rbs; 4707ba8c8a56SBarry Smith PetscScalar *vals; 4708429d309bSHong Zhang 4709429d309bSHong Zhang PetscFunctionBegin; 4710d0f46423SBarry Smith if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4711d0f46423SBarry Smith SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4712429d309bSHong Zhang } 47134ebed01fSBarry Smith ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4714a6b2eed2SHong Zhang ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4715a6b2eed2SHong Zhang 4716a6b2eed2SHong Zhang gen_to = (VecScatter_MPI_General*)ctx->todata; 4717a6b2eed2SHong Zhang gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4718e42f35eeSHong Zhang rvalues = gen_from->values; /* holds the length of receiving row */ 4719e42f35eeSHong Zhang svalues = gen_to->values; /* holds the length of sending row */ 4720a6b2eed2SHong Zhang nrecvs = gen_from->n; 4721a6b2eed2SHong Zhang nsends = gen_to->n; 4722d7ee0231SBarry Smith 4723d7ee0231SBarry Smith ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 4724a6b2eed2SHong Zhang srow = gen_to->indices; /* local row index to be sent */ 4725a6b2eed2SHong Zhang sstarts = gen_to->starts; 4726a6b2eed2SHong Zhang sprocs = gen_to->procs; 4727a6b2eed2SHong Zhang sstatus = gen_to->sstatus; 4728e42f35eeSHong Zhang sbs = gen_to->bs; 4729e42f35eeSHong Zhang rstarts = gen_from->starts; 4730e42f35eeSHong Zhang rprocs = gen_from->procs; 4731e42f35eeSHong Zhang rbs = gen_from->bs; 4732429d309bSHong Zhang 4733dea91ad1SHong Zhang if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4734429d309bSHong Zhang if (scall == MAT_INITIAL_MATRIX){ 4735a6b2eed2SHong Zhang /* i-array */ 4736a6b2eed2SHong Zhang /*---------*/ 4737a6b2eed2SHong Zhang /* post receives */ 4738a6b2eed2SHong Zhang for (i=0; i<nrecvs; i++){ 4739e42f35eeSHong Zhang rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4740e42f35eeSHong Zhang nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 474187025532SHong Zhang ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4742429d309bSHong Zhang } 4743a6b2eed2SHong Zhang 4744a6b2eed2SHong Zhang /* pack the outgoing message */ 474587025532SHong Zhang ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 4746a6b2eed2SHong Zhang rstartsj = sstartsj + nsends +1; 4747a6b2eed2SHong Zhang sstartsj[0] = 0; rstartsj[0] = 0; 4748a6b2eed2SHong Zhang len = 0; /* total length of j or a array to be sent */ 4749a6b2eed2SHong Zhang k = 0; 4750a6b2eed2SHong Zhang for (i=0; i<nsends; i++){ 4751e42f35eeSHong Zhang rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4752e42f35eeSHong Zhang nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 475387025532SHong Zhang for (j=0; j<nrows; j++) { 4754d0f46423SBarry Smith row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4755e42f35eeSHong Zhang for (l=0; l<sbs; l++){ 4756e42f35eeSHong Zhang ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 4757e42f35eeSHong Zhang rowlen[j*sbs+l] = ncols; 4758e42f35eeSHong Zhang len += ncols; 4759e42f35eeSHong Zhang ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 4760e42f35eeSHong Zhang } 4761a6b2eed2SHong Zhang k++; 4762429d309bSHong Zhang } 4763e42f35eeSHong Zhang ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4764dea91ad1SHong Zhang sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4765429d309bSHong Zhang } 476687025532SHong Zhang /* recvs and sends of i-array are completed */ 476787025532SHong Zhang i = nrecvs; 476887025532SHong Zhang while (i--) { 4769aa5bb8c0SSatish Balay ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 477087025532SHong Zhang } 47710c468ba9SBarry Smith if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4772e42f35eeSHong Zhang 4773a6b2eed2SHong Zhang /* allocate buffers for sending j and a arrays */ 4774a6b2eed2SHong Zhang ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 4775a6b2eed2SHong Zhang ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 4776a6b2eed2SHong Zhang 477787025532SHong Zhang /* create i-array of B_oth */ 477887025532SHong Zhang ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 477987025532SHong Zhang b_othi[0] = 0; 4780a6b2eed2SHong Zhang len = 0; /* total length of j or a array to be received */ 4781a6b2eed2SHong Zhang k = 0; 4782a6b2eed2SHong Zhang for (i=0; i<nrecvs; i++){ 4783fd0ff01cSHong Zhang rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4784e42f35eeSHong Zhang nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 478587025532SHong Zhang for (j=0; j<nrows; j++) { 478687025532SHong Zhang b_othi[k+1] = b_othi[k] + rowlen[j]; 4787a6b2eed2SHong Zhang len += rowlen[j]; k++; 4788a6b2eed2SHong Zhang } 4789dea91ad1SHong Zhang rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4790a6b2eed2SHong Zhang } 4791a6b2eed2SHong Zhang 479287025532SHong Zhang /* allocate space for j and a arrrays of B_oth */ 479387025532SHong Zhang ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 4794dd6ea824SBarry Smith ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 4795a6b2eed2SHong Zhang 479687025532SHong Zhang /* j-array */ 479787025532SHong Zhang /*---------*/ 4798a6b2eed2SHong Zhang /* post receives of j-array */ 4799a6b2eed2SHong Zhang for (i=0; i<nrecvs; i++){ 480087025532SHong Zhang nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 480187025532SHong Zhang ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4802a6b2eed2SHong Zhang } 4803e42f35eeSHong Zhang 4804e42f35eeSHong Zhang /* pack the outgoing message j-array */ 4805a6b2eed2SHong Zhang k = 0; 4806a6b2eed2SHong Zhang for (i=0; i<nsends; i++){ 4807e42f35eeSHong Zhang nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4808a6b2eed2SHong Zhang bufJ = bufj+sstartsj[i]; 480987025532SHong Zhang for (j=0; j<nrows; j++) { 4810d0f46423SBarry Smith row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4811e42f35eeSHong Zhang for (ll=0; ll<sbs; ll++){ 4812e42f35eeSHong Zhang ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4813a6b2eed2SHong Zhang for (l=0; l<ncols; l++){ 4814a6b2eed2SHong Zhang *bufJ++ = cols[l]; 481587025532SHong Zhang } 4816e42f35eeSHong Zhang ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4817e42f35eeSHong Zhang } 481887025532SHong Zhang } 481987025532SHong Zhang ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 482087025532SHong Zhang } 482187025532SHong Zhang 482287025532SHong Zhang /* recvs and sends of j-array are completed */ 482387025532SHong Zhang i = nrecvs; 482487025532SHong Zhang while (i--) { 4825aa5bb8c0SSatish Balay ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 482687025532SHong Zhang } 48270c468ba9SBarry Smith if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 482887025532SHong Zhang } else if (scall == MAT_REUSE_MATRIX){ 482987025532SHong Zhang sstartsj = *startsj; 483087025532SHong Zhang rstartsj = sstartsj + nsends +1; 483187025532SHong Zhang bufa = *bufa_ptr; 483287025532SHong Zhang b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 483387025532SHong Zhang b_otha = b_oth->a; 483487025532SHong Zhang } else { 483587025532SHong Zhang SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 483687025532SHong Zhang } 483787025532SHong Zhang 483887025532SHong Zhang /* a-array */ 483987025532SHong Zhang /*---------*/ 484087025532SHong Zhang /* post receives of a-array */ 484187025532SHong Zhang for (i=0; i<nrecvs; i++){ 484287025532SHong Zhang nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 484387025532SHong Zhang ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 484487025532SHong Zhang } 4845e42f35eeSHong Zhang 4846e42f35eeSHong Zhang /* pack the outgoing message a-array */ 484787025532SHong Zhang k = 0; 484887025532SHong Zhang for (i=0; i<nsends; i++){ 4849e42f35eeSHong Zhang nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 485087025532SHong Zhang bufA = bufa+sstartsj[i]; 485187025532SHong Zhang for (j=0; j<nrows; j++) { 4852d0f46423SBarry Smith row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4853e42f35eeSHong Zhang for (ll=0; ll<sbs; ll++){ 4854e42f35eeSHong Zhang ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 485587025532SHong Zhang for (l=0; l<ncols; l++){ 4856a6b2eed2SHong Zhang *bufA++ = vals[l]; 4857a6b2eed2SHong Zhang } 4858e42f35eeSHong Zhang ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4859e42f35eeSHong Zhang } 4860a6b2eed2SHong Zhang } 486187025532SHong Zhang ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4862a6b2eed2SHong Zhang } 486387025532SHong Zhang /* recvs and sends of a-array are completed */ 486487025532SHong Zhang i = nrecvs; 486587025532SHong Zhang while (i--) { 4866aa5bb8c0SSatish Balay ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 486787025532SHong Zhang } 48680c468ba9SBarry Smith if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4869d7ee0231SBarry Smith ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4870a6b2eed2SHong Zhang 487187025532SHong Zhang if (scall == MAT_INITIAL_MATRIX){ 4872a6b2eed2SHong Zhang /* put together the new matrix */ 4873d0f46423SBarry Smith ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4874a6b2eed2SHong Zhang 4875a6b2eed2SHong Zhang /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4876a6b2eed2SHong Zhang /* Since these are PETSc arrays, change flags to free them as necessary. */ 487787025532SHong Zhang b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 4878e6b907acSBarry Smith b_oth->free_a = PETSC_TRUE; 4879e6b907acSBarry Smith b_oth->free_ij = PETSC_TRUE; 488087025532SHong Zhang b_oth->nonew = 0; 4881a6b2eed2SHong Zhang 4882a6b2eed2SHong Zhang ierr = PetscFree(bufj);CHKERRQ(ierr); 4883dea91ad1SHong Zhang if (!startsj || !bufa_ptr){ 4884dea91ad1SHong Zhang ierr = PetscFree(sstartsj);CHKERRQ(ierr); 4885dea91ad1SHong Zhang ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4886dea91ad1SHong Zhang } else { 488787025532SHong Zhang *startsj = sstartsj; 488887025532SHong Zhang *bufa_ptr = bufa; 488987025532SHong Zhang } 4890dea91ad1SHong Zhang } 48914ebed01fSBarry Smith ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4892429d309bSHong Zhang PetscFunctionReturn(0); 4893429d309bSHong Zhang } 4894ccd8e176SBarry Smith 489543eb5e2fSMatthew Knepley #undef __FUNCT__ 489643eb5e2fSMatthew Knepley #define __FUNCT__ "MatGetCommunicationStructs" 489743eb5e2fSMatthew Knepley /*@C 489843eb5e2fSMatthew Knepley MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 489943eb5e2fSMatthew Knepley 490043eb5e2fSMatthew Knepley Not Collective 490143eb5e2fSMatthew Knepley 490243eb5e2fSMatthew Knepley Input Parameters: 490343eb5e2fSMatthew Knepley . A - The matrix in mpiaij format 490443eb5e2fSMatthew Knepley 490543eb5e2fSMatthew Knepley Output Parameter: 490643eb5e2fSMatthew Knepley + lvec - The local vector holding off-process values from the argument to a matrix-vector product 490743eb5e2fSMatthew Knepley . colmap - A map from global column index to local index into lvec 490843eb5e2fSMatthew Knepley - multScatter - A scatter from the argument of a matrix-vector product to lvec 490943eb5e2fSMatthew Knepley 491043eb5e2fSMatthew Knepley Level: developer 491143eb5e2fSMatthew Knepley 491243eb5e2fSMatthew Knepley @*/ 491343eb5e2fSMatthew Knepley #if defined (PETSC_USE_CTABLE) 491443eb5e2fSMatthew Knepley PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 491543eb5e2fSMatthew Knepley #else 491643eb5e2fSMatthew Knepley PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 491743eb5e2fSMatthew Knepley #endif 491843eb5e2fSMatthew Knepley { 491943eb5e2fSMatthew Knepley Mat_MPIAIJ *a; 492043eb5e2fSMatthew Knepley 492143eb5e2fSMatthew Knepley PetscFunctionBegin; 492243eb5e2fSMatthew Knepley PetscValidHeaderSpecific(A, MAT_COOKIE, 1); 492343eb5e2fSMatthew Knepley PetscValidPointer(lvec, 2) 492443eb5e2fSMatthew Knepley PetscValidPointer(colmap, 3) 492543eb5e2fSMatthew Knepley PetscValidPointer(multScatter, 4) 492643eb5e2fSMatthew Knepley a = (Mat_MPIAIJ *) A->data; 492743eb5e2fSMatthew Knepley if (lvec) *lvec = a->lvec; 492843eb5e2fSMatthew Knepley if (colmap) *colmap = a->colmap; 492943eb5e2fSMatthew Knepley if (multScatter) *multScatter = a->Mvctx; 493043eb5e2fSMatthew Knepley PetscFunctionReturn(0); 493143eb5e2fSMatthew Knepley } 493243eb5e2fSMatthew Knepley 493317667f90SBarry Smith EXTERN_C_BEGIN 49348cf70c4bSSatish Balay extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*); 49358cf70c4bSSatish Balay extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*); 493617667f90SBarry Smith EXTERN_C_END 493717667f90SBarry Smith 49387c4f633dSBarry Smith #include "../src/mat/impls/dense/mpi/mpidense.h" 4939fc4dec0aSBarry Smith 4940fc4dec0aSBarry Smith #undef __FUNCT__ 4941fc4dec0aSBarry Smith #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 4942fc4dec0aSBarry Smith /* 4943fc4dec0aSBarry Smith Computes (B'*A')' since computing B*A directly is untenable 4944fc4dec0aSBarry Smith 4945fc4dec0aSBarry Smith n p p 4946fc4dec0aSBarry Smith ( ) ( ) ( ) 4947fc4dec0aSBarry Smith m ( A ) * n ( B ) = m ( C ) 4948fc4dec0aSBarry Smith ( ) ( ) ( ) 4949fc4dec0aSBarry Smith 4950fc4dec0aSBarry Smith */ 4951fc4dec0aSBarry Smith PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 4952fc4dec0aSBarry Smith { 4953fc4dec0aSBarry Smith PetscErrorCode ierr; 4954fc4dec0aSBarry Smith Mat At,Bt,Ct; 4955fc4dec0aSBarry Smith 4956fc4dec0aSBarry Smith PetscFunctionBegin; 4957fc4dec0aSBarry Smith ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 4958fc4dec0aSBarry Smith ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 4959fc4dec0aSBarry Smith ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 4960fc4dec0aSBarry Smith ierr = MatDestroy(At);CHKERRQ(ierr); 4961fc4dec0aSBarry Smith ierr = MatDestroy(Bt);CHKERRQ(ierr); 4962fc4dec0aSBarry Smith ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 4963e5e4356aSBarry Smith ierr = MatDestroy(Ct);CHKERRQ(ierr); 4964fc4dec0aSBarry Smith PetscFunctionReturn(0); 4965fc4dec0aSBarry Smith } 4966fc4dec0aSBarry Smith 4967fc4dec0aSBarry Smith #undef __FUNCT__ 4968fc4dec0aSBarry Smith #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 4969fc4dec0aSBarry Smith PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 4970fc4dec0aSBarry Smith { 4971fc4dec0aSBarry Smith PetscErrorCode ierr; 4972d0f46423SBarry Smith PetscInt m=A->rmap->n,n=B->cmap->n; 4973fc4dec0aSBarry Smith Mat Cmat; 4974fc4dec0aSBarry Smith 4975fc4dec0aSBarry Smith PetscFunctionBegin; 4976d0f46423SBarry Smith if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 497739804f7cSBarry Smith ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 4978fc4dec0aSBarry Smith ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4979fc4dec0aSBarry Smith ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 4980fc4dec0aSBarry Smith ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 498138556019SBarry Smith ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 498238556019SBarry Smith ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4983fc4dec0aSBarry Smith *C = Cmat; 4984fc4dec0aSBarry Smith PetscFunctionReturn(0); 4985fc4dec0aSBarry Smith } 4986fc4dec0aSBarry Smith 4987fc4dec0aSBarry Smith /* ----------------------------------------------------------------*/ 4988fc4dec0aSBarry Smith #undef __FUNCT__ 4989fc4dec0aSBarry Smith #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 4990fc4dec0aSBarry Smith PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 4991fc4dec0aSBarry Smith { 4992fc4dec0aSBarry Smith PetscErrorCode ierr; 4993fc4dec0aSBarry Smith 4994fc4dec0aSBarry Smith PetscFunctionBegin; 4995fc4dec0aSBarry Smith if (scall == MAT_INITIAL_MATRIX){ 4996fc4dec0aSBarry Smith ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 4997fc4dec0aSBarry Smith } 4998fc4dec0aSBarry Smith ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 4999fc4dec0aSBarry Smith PetscFunctionReturn(0); 5000fc4dec0aSBarry Smith } 5001fc4dec0aSBarry Smith 50025c9eb25fSBarry Smith EXTERN_C_BEGIN 5003611f576cSBarry Smith #if defined(PETSC_HAVE_MUMPS) 50045c9eb25fSBarry Smith extern PetscErrorCode MatGetFactor_mpiaij_mumps(Mat,MatFactorType,Mat*); 5005611f576cSBarry Smith #endif 50063bf14a46SMatthew Knepley #if defined(PETSC_HAVE_PASTIX) 50073bf14a46SMatthew Knepley extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 50083bf14a46SMatthew Knepley #endif 5009611f576cSBarry Smith #if defined(PETSC_HAVE_SUPERLU_DIST) 50105c9eb25fSBarry Smith extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5011611f576cSBarry Smith #endif 5012611f576cSBarry Smith #if defined(PETSC_HAVE_SPOOLES) 50135c9eb25fSBarry Smith extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5014611f576cSBarry Smith #endif 50155c9eb25fSBarry Smith EXTERN_C_END 50165c9eb25fSBarry Smith 5017ccd8e176SBarry Smith /*MC 5018ccd8e176SBarry Smith MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5019ccd8e176SBarry Smith 5020ccd8e176SBarry Smith Options Database Keys: 5021ccd8e176SBarry Smith . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5022ccd8e176SBarry Smith 5023ccd8e176SBarry Smith Level: beginner 5024ccd8e176SBarry Smith 5025175b88e8SBarry Smith .seealso: MatCreateMPIAIJ() 5026ccd8e176SBarry Smith M*/ 5027ccd8e176SBarry Smith 5028ccd8e176SBarry Smith EXTERN_C_BEGIN 5029ccd8e176SBarry Smith #undef __FUNCT__ 5030ccd8e176SBarry Smith #define __FUNCT__ "MatCreate_MPIAIJ" 5031be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 5032ccd8e176SBarry Smith { 5033ccd8e176SBarry Smith Mat_MPIAIJ *b; 5034ccd8e176SBarry Smith PetscErrorCode ierr; 5035ccd8e176SBarry Smith PetscMPIInt size; 5036ccd8e176SBarry Smith 5037ccd8e176SBarry Smith PetscFunctionBegin; 50387adad957SLisandro Dalcin ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5039ccd8e176SBarry Smith 504038f2d2fdSLisandro Dalcin ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5041ccd8e176SBarry Smith B->data = (void*)b; 5042ccd8e176SBarry Smith ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5043d0f46423SBarry Smith B->rmap->bs = 1; 5044ccd8e176SBarry Smith B->assembled = PETSC_FALSE; 5045ccd8e176SBarry Smith B->mapping = 0; 5046ccd8e176SBarry Smith 5047ccd8e176SBarry Smith B->insertmode = NOT_SET_VALUES; 5048ccd8e176SBarry Smith b->size = size; 50497adad957SLisandro Dalcin ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5050ccd8e176SBarry Smith 5051ccd8e176SBarry Smith /* build cache for off array entries formed */ 50527adad957SLisandro Dalcin ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5053ccd8e176SBarry Smith b->donotstash = PETSC_FALSE; 5054ccd8e176SBarry Smith b->colmap = 0; 5055ccd8e176SBarry Smith b->garray = 0; 5056ccd8e176SBarry Smith b->roworiented = PETSC_TRUE; 5057ccd8e176SBarry Smith 5058ccd8e176SBarry Smith /* stuff used for matrix vector multiply */ 5059ccd8e176SBarry Smith b->lvec = PETSC_NULL; 5060ccd8e176SBarry Smith b->Mvctx = PETSC_NULL; 5061ccd8e176SBarry Smith 5062ccd8e176SBarry Smith /* stuff for MatGetRow() */ 5063ccd8e176SBarry Smith b->rowindices = 0; 5064ccd8e176SBarry Smith b->rowvalues = 0; 5065ccd8e176SBarry Smith b->getrowactive = PETSC_FALSE; 5066ccd8e176SBarry Smith 5067611f576cSBarry Smith #if defined(PETSC_HAVE_SPOOLES) 50685c9eb25fSBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_spooles_C", 50695c9eb25fSBarry Smith "MatGetFactor_mpiaij_spooles", 50705c9eb25fSBarry Smith MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5071611f576cSBarry Smith #endif 5072611f576cSBarry Smith #if defined(PETSC_HAVE_MUMPS) 50735c9eb25fSBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_mumps_C", 50745c9eb25fSBarry Smith "MatGetFactor_mpiaij_mumps", 50755c9eb25fSBarry Smith MatGetFactor_mpiaij_mumps);CHKERRQ(ierr); 5076611f576cSBarry Smith #endif 50773bf14a46SMatthew Knepley #if defined(PETSC_HAVE_PASTIX) 50783bf14a46SMatthew Knepley ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_pastix_C", 50793bf14a46SMatthew Knepley "MatGetFactor_mpiaij_pastix", 50803bf14a46SMatthew Knepley MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 50813bf14a46SMatthew Knepley #endif 5082611f576cSBarry Smith #if defined(PETSC_HAVE_SUPERLU_DIST) 50835c9eb25fSBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_superlu_dist_C", 50845c9eb25fSBarry Smith "MatGetFactor_mpiaij_superlu_dist", 50855c9eb25fSBarry Smith MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5086611f576cSBarry Smith #endif 5087ccd8e176SBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5088ccd8e176SBarry Smith "MatStoreValues_MPIAIJ", 5089ccd8e176SBarry Smith MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5090ccd8e176SBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5091ccd8e176SBarry Smith "MatRetrieveValues_MPIAIJ", 5092ccd8e176SBarry Smith MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5093ccd8e176SBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5094ccd8e176SBarry Smith "MatGetDiagonalBlock_MPIAIJ", 5095ccd8e176SBarry Smith MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5096ccd8e176SBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5097ccd8e176SBarry Smith "MatIsTranspose_MPIAIJ", 5098ccd8e176SBarry Smith MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5099ccd8e176SBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5100ccd8e176SBarry Smith "MatMPIAIJSetPreallocation_MPIAIJ", 5101ccd8e176SBarry Smith MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5102ccd8e176SBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5103ccd8e176SBarry Smith "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5104ccd8e176SBarry Smith MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5105ccd8e176SBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5106ccd8e176SBarry Smith "MatDiagonalScaleLocal_MPIAIJ", 5107ccd8e176SBarry Smith MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 510817667f90SBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C", 510917667f90SBarry Smith "MatConvert_MPIAIJ_MPICSRPERM", 511017667f90SBarry Smith MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr); 511117667f90SBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C", 511217667f90SBarry Smith "MatConvert_MPIAIJ_MPICRL", 511317667f90SBarry Smith MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr); 5114fc4dec0aSBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5115fc4dec0aSBarry Smith "MatMatMult_MPIDense_MPIAIJ", 5116fc4dec0aSBarry Smith MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5117fc4dec0aSBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5118fc4dec0aSBarry Smith "MatMatMultSymbolic_MPIDense_MPIAIJ", 5119fc4dec0aSBarry Smith MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5120fc4dec0aSBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5121fc4dec0aSBarry Smith "MatMatMultNumeric_MPIDense_MPIAIJ", 5122fc4dec0aSBarry Smith MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 512317667f90SBarry Smith ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5124ccd8e176SBarry Smith PetscFunctionReturn(0); 5125ccd8e176SBarry Smith } 5126ccd8e176SBarry Smith EXTERN_C_END 512781824310SBarry Smith 512803bfb495SBarry Smith #undef __FUNCT__ 512903bfb495SBarry Smith #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 513058d36128SBarry Smith /*@ 513103bfb495SBarry Smith MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 513203bfb495SBarry Smith and "off-diagonal" part of the matrix in CSR format. 513303bfb495SBarry Smith 513403bfb495SBarry Smith Collective on MPI_Comm 513503bfb495SBarry Smith 513603bfb495SBarry Smith Input Parameters: 513703bfb495SBarry Smith + comm - MPI communicator 513803bfb495SBarry Smith . m - number of local rows (Cannot be PETSC_DECIDE) 513903bfb495SBarry Smith . n - This value should be the same as the local size used in creating the 514003bfb495SBarry Smith x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 514103bfb495SBarry Smith calculated if N is given) For square matrices n is almost always m. 514203bfb495SBarry Smith . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 514303bfb495SBarry Smith . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 514403bfb495SBarry Smith . i - row indices for "diagonal" portion of matrix 514503bfb495SBarry Smith . j - column indices 514603bfb495SBarry Smith . a - matrix values 514703bfb495SBarry Smith . oi - row indices for "off-diagonal" portion of matrix 514803bfb495SBarry Smith . oj - column indices 514903bfb495SBarry Smith - oa - matrix values 515003bfb495SBarry Smith 515103bfb495SBarry Smith Output Parameter: 515203bfb495SBarry Smith . mat - the matrix 515303bfb495SBarry Smith 515403bfb495SBarry Smith Level: advanced 515503bfb495SBarry Smith 515603bfb495SBarry Smith Notes: 515703bfb495SBarry Smith The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 515803bfb495SBarry Smith 515903bfb495SBarry Smith The i and j indices are 0 based 516003bfb495SBarry Smith 516103bfb495SBarry Smith See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 516203bfb495SBarry Smith 51637b55108eSBarry Smith This sets local rows and cannot be used to set off-processor values. 51647b55108eSBarry Smith 51657b55108eSBarry Smith You cannot later use MatSetValues() to change values in this matrix. 516603bfb495SBarry Smith 516703bfb495SBarry Smith .keywords: matrix, aij, compressed row, sparse, parallel 516803bfb495SBarry Smith 516903bfb495SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 51708d7a6e47SBarry Smith MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 517103bfb495SBarry Smith @*/ 51728d7a6e47SBarry Smith PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 517303bfb495SBarry Smith PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 517403bfb495SBarry Smith { 517503bfb495SBarry Smith PetscErrorCode ierr; 517603bfb495SBarry Smith Mat_MPIAIJ *maij; 517703bfb495SBarry Smith 517803bfb495SBarry Smith PetscFunctionBegin; 517903bfb495SBarry Smith if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 518003bfb495SBarry Smith if (i[0]) { 518103bfb495SBarry Smith SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 518203bfb495SBarry Smith } 518303bfb495SBarry Smith if (oi[0]) { 518403bfb495SBarry Smith SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 518503bfb495SBarry Smith } 518603bfb495SBarry Smith ierr = MatCreate(comm,mat);CHKERRQ(ierr); 518703bfb495SBarry Smith ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 518803bfb495SBarry Smith ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 518903bfb495SBarry Smith maij = (Mat_MPIAIJ*) (*mat)->data; 51908d7a6e47SBarry Smith maij->donotstash = PETSC_TRUE; 51918d7a6e47SBarry Smith (*mat)->preallocated = PETSC_TRUE; 519203bfb495SBarry Smith 51937408324eSLisandro Dalcin ierr = PetscMapSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 51947408324eSLisandro Dalcin ierr = PetscMapSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5195d0f46423SBarry Smith ierr = PetscMapSetUp((*mat)->rmap);CHKERRQ(ierr); 5196d0f46423SBarry Smith ierr = PetscMapSetUp((*mat)->cmap);CHKERRQ(ierr); 519703bfb495SBarry Smith 519803bfb495SBarry Smith ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5199d0f46423SBarry Smith ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 520003bfb495SBarry Smith 52018d7a6e47SBarry Smith ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 52028d7a6e47SBarry Smith ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 52038d7a6e47SBarry Smith ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 52048d7a6e47SBarry Smith ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 52058d7a6e47SBarry Smith 520603bfb495SBarry Smith ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 520703bfb495SBarry Smith ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 520803bfb495SBarry Smith PetscFunctionReturn(0); 520903bfb495SBarry Smith } 521003bfb495SBarry Smith 521181824310SBarry Smith /* 521281824310SBarry Smith Special version for direct calls from Fortran 521381824310SBarry Smith */ 521481824310SBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS) 521581824310SBarry Smith #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 521681824310SBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 521781824310SBarry Smith #define matsetvaluesmpiaij_ matsetvaluesmpiaij 521881824310SBarry Smith #endif 521981824310SBarry Smith 522081824310SBarry Smith /* Change these macros so can be used in void function */ 522181824310SBarry Smith #undef CHKERRQ 52227adad957SLisandro Dalcin #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr) 522381824310SBarry Smith #undef SETERRQ2 52247adad957SLisandro Dalcin #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr) 522581824310SBarry Smith #undef SETERRQ 52267adad957SLisandro Dalcin #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr) 522781824310SBarry Smith 522881824310SBarry Smith EXTERN_C_BEGIN 522981824310SBarry Smith #undef __FUNCT__ 523081824310SBarry Smith #define __FUNCT__ "matsetvaluesmpiaij_" 52311f6cc5b2SSatish Balay void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 523281824310SBarry Smith { 523381824310SBarry Smith Mat mat = *mmat; 523481824310SBarry Smith PetscInt m = *mm, n = *mn; 523581824310SBarry Smith InsertMode addv = *maddv; 523681824310SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 523781824310SBarry Smith PetscScalar value; 523881824310SBarry Smith PetscErrorCode ierr; 5239899cda47SBarry Smith 5240d9e2c085SLisandro Dalcin ierr = MatPreallocated(mat);CHKERRQ(ierr); 524181824310SBarry Smith if (mat->insertmode == NOT_SET_VALUES) { 524281824310SBarry Smith mat->insertmode = addv; 524381824310SBarry Smith } 524481824310SBarry Smith #if defined(PETSC_USE_DEBUG) 524581824310SBarry Smith else if (mat->insertmode != addv) { 524681824310SBarry Smith SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 524781824310SBarry Smith } 524881824310SBarry Smith #endif 524981824310SBarry Smith { 5250d0f46423SBarry Smith PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5251d0f46423SBarry Smith PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 525281824310SBarry Smith PetscTruth roworiented = aij->roworiented; 525381824310SBarry Smith 525481824310SBarry Smith /* Some Variables required in the macro */ 525581824310SBarry Smith Mat A = aij->A; 525681824310SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 525781824310SBarry Smith PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5258dd6ea824SBarry Smith MatScalar *aa = a->a; 525981824310SBarry Smith PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 526081824310SBarry Smith Mat B = aij->B; 526181824310SBarry Smith Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5262d0f46423SBarry Smith PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5263dd6ea824SBarry Smith MatScalar *ba = b->a; 526481824310SBarry Smith 526581824310SBarry Smith PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 526681824310SBarry Smith PetscInt nonew = a->nonew; 5267dd6ea824SBarry Smith MatScalar *ap1,*ap2; 526881824310SBarry Smith 526981824310SBarry Smith PetscFunctionBegin; 527081824310SBarry Smith for (i=0; i<m; i++) { 527181824310SBarry Smith if (im[i] < 0) continue; 527281824310SBarry Smith #if defined(PETSC_USE_DEBUG) 5273d0f46423SBarry Smith if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 527481824310SBarry Smith #endif 527581824310SBarry Smith if (im[i] >= rstart && im[i] < rend) { 527681824310SBarry Smith row = im[i] - rstart; 527781824310SBarry Smith lastcol1 = -1; 527881824310SBarry Smith rp1 = aj + ai[row]; 527981824310SBarry Smith ap1 = aa + ai[row]; 528081824310SBarry Smith rmax1 = aimax[row]; 528181824310SBarry Smith nrow1 = ailen[row]; 528281824310SBarry Smith low1 = 0; 528381824310SBarry Smith high1 = nrow1; 528481824310SBarry Smith lastcol2 = -1; 528581824310SBarry Smith rp2 = bj + bi[row]; 528681824310SBarry Smith ap2 = ba + bi[row]; 528781824310SBarry Smith rmax2 = bimax[row]; 528881824310SBarry Smith nrow2 = bilen[row]; 528981824310SBarry Smith low2 = 0; 529081824310SBarry Smith high2 = nrow2; 529181824310SBarry Smith 529281824310SBarry Smith for (j=0; j<n; j++) { 529381824310SBarry Smith if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 529481824310SBarry Smith if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 529581824310SBarry Smith if (in[j] >= cstart && in[j] < cend){ 529681824310SBarry Smith col = in[j] - cstart; 529781824310SBarry Smith MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 529881824310SBarry Smith } else if (in[j] < 0) continue; 529981824310SBarry Smith #if defined(PETSC_USE_DEBUG) 5300d0f46423SBarry Smith else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 530181824310SBarry Smith #endif 530281824310SBarry Smith else { 530381824310SBarry Smith if (mat->was_assembled) { 530481824310SBarry Smith if (!aij->colmap) { 530581824310SBarry Smith ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 530681824310SBarry Smith } 530781824310SBarry Smith #if defined (PETSC_USE_CTABLE) 530881824310SBarry Smith ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 530981824310SBarry Smith col--; 531081824310SBarry Smith #else 531181824310SBarry Smith col = aij->colmap[in[j]] - 1; 531281824310SBarry Smith #endif 531381824310SBarry Smith if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 531481824310SBarry Smith ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 531581824310SBarry Smith col = in[j]; 531681824310SBarry Smith /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 531781824310SBarry Smith B = aij->B; 531881824310SBarry Smith b = (Mat_SeqAIJ*)B->data; 531981824310SBarry Smith bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 532081824310SBarry Smith rp2 = bj + bi[row]; 532181824310SBarry Smith ap2 = ba + bi[row]; 532281824310SBarry Smith rmax2 = bimax[row]; 532381824310SBarry Smith nrow2 = bilen[row]; 532481824310SBarry Smith low2 = 0; 532581824310SBarry Smith high2 = nrow2; 5326d0f46423SBarry Smith bm = aij->B->rmap->n; 532781824310SBarry Smith ba = b->a; 532881824310SBarry Smith } 532981824310SBarry Smith } else col = in[j]; 533081824310SBarry Smith MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 533181824310SBarry Smith } 533281824310SBarry Smith } 533381824310SBarry Smith } else { 533481824310SBarry Smith if (!aij->donotstash) { 533581824310SBarry Smith if (roworiented) { 53363b024144SHong Zhang ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 533781824310SBarry Smith } else { 53383b024144SHong Zhang ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 533981824310SBarry Smith } 534081824310SBarry Smith } 534181824310SBarry Smith } 534281824310SBarry Smith }} 534381824310SBarry Smith PetscFunctionReturnVoid(); 534481824310SBarry Smith } 534581824310SBarry Smith EXTERN_C_END 534603bfb495SBarry Smith 5347