1cb512458SBarry Smith #ifndef lint 2*227d817aSBarry Smith static char vcid[] = "$Id: mpiaij.c,v 1.116 1996/01/24 16:01:05 balay Exp bsmith $"; 3cb512458SBarry Smith #endif 48a729477SBarry Smith 51eb62cbbSBarry Smith #include "mpiaij.h" 68a729477SBarry Smith #include "vec/vecimpl.h" 7d6dfbf8fSBarry Smith #include "inline/spops.h" 88a729477SBarry Smith 99e25ed09SBarry Smith /* local utility routine that creates a mapping from the global column 109e25ed09SBarry Smith number to the local number in the off-diagonal part of the local 119e25ed09SBarry Smith storage of the matrix. This is done in a non scable way since the 129e25ed09SBarry Smith length of colmap equals the global matrix length. 139e25ed09SBarry Smith */ 14b7c46309SBarry Smith static int CreateColmap_Private(Mat mat) 159e25ed09SBarry Smith { 1644a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 17ec8511deSBarry Smith Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 18416022c9SBarry Smith int n = B->n,i,shift = B->indexshift; 19dbb450caSBarry Smith 200452661fSBarry Smith aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap); 21464493b3SBarry Smith PLogObjectMemory(mat,aij->N*sizeof(int)); 22cddf8d76SBarry Smith PetscMemzero(aij->colmap,aij->N*sizeof(int)); 23dbb450caSBarry Smith for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i-shift; 249e25ed09SBarry Smith return 0; 259e25ed09SBarry Smith } 269e25ed09SBarry Smith 272493cbb0SBarry Smith extern int DisAssemble_MPIAIJ(Mat); 282493cbb0SBarry Smith 2951c98ccdSLois Curfman McInnes static int MatGetReordering_MPIAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm) 30299609e3SLois Curfman McInnes { 31299609e3SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 32299609e3SLois Curfman McInnes int ierr; 3317699dbbSLois Curfman McInnes if (aij->size == 1) { 3451c98ccdSLois Curfman McInnes ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr); 3548d91487SBarry Smith } else SETERRQ(1,"MatGetReordering_MPIAIJ:not supported in parallel"); 36299609e3SLois Curfman McInnes return 0; 37299609e3SLois Curfman McInnes } 38299609e3SLois Curfman McInnes 394b0e389bSBarry Smith static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 408a729477SBarry Smith { 4144a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 42dbb450caSBarry Smith Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data; 434b0e389bSBarry Smith Scalar value; 441eb62cbbSBarry Smith int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 451eb62cbbSBarry Smith int cstart = aij->cstart, cend = aij->cend,row,col; 464b0e389bSBarry Smith int shift = C->indexshift,roworiented = aij->roworiented; 478a729477SBarry Smith 4864119d58SLois Curfman McInnes if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) { 4948d91487SBarry Smith SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds"); 508a729477SBarry Smith } 511eb62cbbSBarry Smith aij->insertmode = addv; 528a729477SBarry Smith for ( i=0; i<m; i++ ) { 534b0e389bSBarry Smith if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row"); 544b0e389bSBarry Smith if (im[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large"); 554b0e389bSBarry Smith if (im[i] >= rstart && im[i] < rend) { 564b0e389bSBarry Smith row = im[i] - rstart; 571eb62cbbSBarry Smith for ( j=0; j<n; j++ ) { 584b0e389bSBarry Smith if (in[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column"); 594b0e389bSBarry Smith if (in[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large"); 604b0e389bSBarry Smith if (in[j] >= cstart && in[j] < cend){ 614b0e389bSBarry Smith col = in[j] - cstart; 624b0e389bSBarry Smith if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 634b0e389bSBarry Smith ierr = MatSetValues(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 641eb62cbbSBarry Smith } 651eb62cbbSBarry Smith else { 66*227d817aSBarry Smith if (mat->was_assembled) { 67b7c46309SBarry Smith if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 684b0e389bSBarry Smith col = aij->colmap[in[j]] + shift; 69ec8511deSBarry Smith if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 702493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 714b0e389bSBarry Smith col = in[j]; 72d6dfbf8fSBarry Smith } 739e25ed09SBarry Smith } 744b0e389bSBarry Smith else col = in[j]; 754b0e389bSBarry Smith if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 764b0e389bSBarry Smith ierr = MatSetValues(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 771eb62cbbSBarry Smith } 781eb62cbbSBarry Smith } 791eb62cbbSBarry Smith } 801eb62cbbSBarry Smith else { 814b0e389bSBarry Smith if (roworiented) { 824b0e389bSBarry Smith ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 834b0e389bSBarry Smith } 844b0e389bSBarry Smith else { 854b0e389bSBarry Smith row = im[i]; 864b0e389bSBarry Smith for ( j=0; j<n; j++ ) { 874b0e389bSBarry Smith ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 884b0e389bSBarry Smith } 894b0e389bSBarry Smith } 901eb62cbbSBarry Smith } 918a729477SBarry Smith } 928a729477SBarry Smith return 0; 938a729477SBarry Smith } 948a729477SBarry Smith 95b49de8d1SLois Curfman McInnes static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 96b49de8d1SLois Curfman McInnes { 97b49de8d1SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 98b49de8d1SLois Curfman McInnes Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data; 99b49de8d1SLois Curfman McInnes int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 100b49de8d1SLois Curfman McInnes int cstart = aij->cstart, cend = aij->cend,row,col; 101b49de8d1SLois Curfman McInnes int shift = C->indexshift; 102b49de8d1SLois Curfman McInnes 103b49de8d1SLois Curfman McInnes for ( i=0; i<m; i++ ) { 104b49de8d1SLois Curfman McInnes if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative row"); 105b49de8d1SLois Curfman McInnes if (idxm[i] >= aij->M) SETERRQ(1,"MatGetValues_MPIAIJ:Row too large"); 106b49de8d1SLois Curfman McInnes if (idxm[i] >= rstart && idxm[i] < rend) { 107b49de8d1SLois Curfman McInnes row = idxm[i] - rstart; 108b49de8d1SLois Curfman McInnes for ( j=0; j<n; j++ ) { 109b49de8d1SLois Curfman McInnes if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative column"); 110b49de8d1SLois Curfman McInnes if (idxn[j] >= aij->N) SETERRQ(1,"MatGetValues_MPIAIJ:Col too large"); 111b49de8d1SLois Curfman McInnes if (idxn[j] >= cstart && idxn[j] < cend){ 112b49de8d1SLois Curfman McInnes col = idxn[j] - cstart; 113b49de8d1SLois Curfman McInnes ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 114b49de8d1SLois Curfman McInnes } 115b49de8d1SLois Curfman McInnes else { 116b49de8d1SLois Curfman McInnes col = aij->colmap[idxn[j]] + shift; 117b49de8d1SLois Curfman McInnes ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 118b49de8d1SLois Curfman McInnes } 119b49de8d1SLois Curfman McInnes } 120b49de8d1SLois Curfman McInnes } 121b49de8d1SLois Curfman McInnes else { 122b49de8d1SLois Curfman McInnes SETERRQ(1,"MatGetValues_MPIAIJ:Only local values currently supported"); 123b49de8d1SLois Curfman McInnes } 124b49de8d1SLois Curfman McInnes } 125b49de8d1SLois Curfman McInnes return 0; 126b49de8d1SLois Curfman McInnes } 127b49de8d1SLois Curfman McInnes 128fc76ce87SLois Curfman McInnes static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 1298a729477SBarry Smith { 13044a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 131d6dfbf8fSBarry Smith MPI_Comm comm = mat->comm; 13217699dbbSLois Curfman McInnes int size = aij->size, *owners = aij->rowners; 13317699dbbSLois Curfman McInnes int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 1341eb62cbbSBarry Smith MPI_Request *send_waits,*recv_waits; 1356abc6512SBarry Smith int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 1361eb62cbbSBarry Smith InsertMode addv; 1371eb62cbbSBarry Smith Scalar *rvalues,*svalues; 1381eb62cbbSBarry Smith 1391eb62cbbSBarry Smith /* make sure all processors are either in INSERTMODE or ADDMODE */ 140d65a2f8fSBarry Smith MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 141dbb450caSBarry Smith if (addv == (ADD_VALUES|INSERT_VALUES)) { 142bbb6d6a8SBarry Smith SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added"); 1431eb62cbbSBarry Smith } 1441eb62cbbSBarry Smith aij->insertmode = addv; /* in case this processor had no cache */ 1451eb62cbbSBarry Smith 1461eb62cbbSBarry Smith /* first count number of contributors to each processor */ 1470452661fSBarry Smith nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 148cddf8d76SBarry Smith PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 1490452661fSBarry Smith owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 1501eb62cbbSBarry Smith for ( i=0; i<aij->stash.n; i++ ) { 1511eb62cbbSBarry Smith idx = aij->stash.idx[i]; 15217699dbbSLois Curfman McInnes for ( j=0; j<size; j++ ) { 1531eb62cbbSBarry Smith if (idx >= owners[j] && idx < owners[j+1]) { 1541eb62cbbSBarry Smith nprocs[j]++; procs[j] = 1; owner[i] = j; break; 1558a729477SBarry Smith } 1568a729477SBarry Smith } 1578a729477SBarry Smith } 15817699dbbSLois Curfman McInnes nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 1591eb62cbbSBarry Smith 1601eb62cbbSBarry Smith /* inform other processors of number of messages and max length*/ 1610452661fSBarry Smith work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 16217699dbbSLois Curfman McInnes MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 16317699dbbSLois Curfman McInnes nreceives = work[rank]; 16417699dbbSLois Curfman McInnes MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 16517699dbbSLois Curfman McInnes nmax = work[rank]; 1660452661fSBarry Smith PetscFree(work); 1671eb62cbbSBarry Smith 1681eb62cbbSBarry Smith /* post receives: 1691eb62cbbSBarry Smith 1) each message will consist of ordered pairs 1701eb62cbbSBarry Smith (global index,value) we store the global index as a double 171d6dfbf8fSBarry Smith to simplify the message passing. 1721eb62cbbSBarry Smith 2) since we don't know how long each individual message is we 1731eb62cbbSBarry Smith allocate the largest needed buffer for each receive. Potentially 1741eb62cbbSBarry Smith this is a lot of wasted space. 1751eb62cbbSBarry Smith 1761eb62cbbSBarry Smith 1771eb62cbbSBarry Smith This could be done better. 1781eb62cbbSBarry Smith */ 1790452661fSBarry Smith rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 18078b31e54SBarry Smith CHKPTRQ(rvalues); 1810452661fSBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 18278b31e54SBarry Smith CHKPTRQ(recv_waits); 1831eb62cbbSBarry Smith for ( i=0; i<nreceives; i++ ) { 184416022c9SBarry Smith MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 1851eb62cbbSBarry Smith comm,recv_waits+i); 1861eb62cbbSBarry Smith } 1871eb62cbbSBarry Smith 1881eb62cbbSBarry Smith /* do sends: 1891eb62cbbSBarry Smith 1) starts[i] gives the starting index in svalues for stuff going to 1901eb62cbbSBarry Smith the ith processor 1911eb62cbbSBarry Smith */ 1920452661fSBarry Smith svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 1930452661fSBarry Smith send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 19478b31e54SBarry Smith CHKPTRQ(send_waits); 1950452661fSBarry Smith starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 1961eb62cbbSBarry Smith starts[0] = 0; 19717699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1981eb62cbbSBarry Smith for ( i=0; i<aij->stash.n; i++ ) { 1991eb62cbbSBarry Smith svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 2001eb62cbbSBarry Smith svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 2011eb62cbbSBarry Smith svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 2021eb62cbbSBarry Smith } 2030452661fSBarry Smith PetscFree(owner); 2041eb62cbbSBarry Smith starts[0] = 0; 20517699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 2061eb62cbbSBarry Smith count = 0; 20717699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 2081eb62cbbSBarry Smith if (procs[i]) { 209416022c9SBarry Smith MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 2101eb62cbbSBarry Smith comm,send_waits+count++); 2111eb62cbbSBarry Smith } 2121eb62cbbSBarry Smith } 2130452661fSBarry Smith PetscFree(starts); PetscFree(nprocs); 2141eb62cbbSBarry Smith 2151eb62cbbSBarry Smith /* Free cache space */ 21678b31e54SBarry Smith ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 2171eb62cbbSBarry Smith 2181eb62cbbSBarry Smith aij->svalues = svalues; aij->rvalues = rvalues; 2191eb62cbbSBarry Smith aij->nsends = nsends; aij->nrecvs = nreceives; 2201eb62cbbSBarry Smith aij->send_waits = send_waits; aij->recv_waits = recv_waits; 2211eb62cbbSBarry Smith aij->rmax = nmax; 2221eb62cbbSBarry Smith 2231eb62cbbSBarry Smith return 0; 2241eb62cbbSBarry Smith } 22544a69424SLois Curfman McInnes extern int MatSetUpMultiply_MPIAIJ(Mat); 2261eb62cbbSBarry Smith 227fc76ce87SLois Curfman McInnes static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 2281eb62cbbSBarry Smith { 22944a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 230dbb450caSBarry Smith Mat_SeqAIJ *C = (Mat_SeqAIJ *) aij->A->data; 2311eb62cbbSBarry Smith MPI_Status *send_status,recv_status; 232416022c9SBarry Smith int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 233416022c9SBarry Smith int row,col,other_disassembled,shift = C->indexshift; 2341eb62cbbSBarry Smith Scalar *values,val; 2351eb62cbbSBarry Smith InsertMode addv = aij->insertmode; 2361eb62cbbSBarry Smith 2371eb62cbbSBarry Smith /* wait on receives */ 2381eb62cbbSBarry Smith while (count) { 239d6dfbf8fSBarry Smith MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 2401eb62cbbSBarry Smith /* unpack receives into our local space */ 241d6dfbf8fSBarry Smith values = aij->rvalues + 3*imdex*aij->rmax; 242c60448a5SBarry Smith MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 2431eb62cbbSBarry Smith n = n/3; 2441eb62cbbSBarry Smith for ( i=0; i<n; i++ ) { 245*227d817aSBarry Smith row = (int) PetscReal(values[3*i]) - aij->rstart; 246*227d817aSBarry Smith col = (int) PetscReal(values[3*i+1]); 2471eb62cbbSBarry Smith val = values[3*i+2]; 2481eb62cbbSBarry Smith if (col >= aij->cstart && col < aij->cend) { 2491eb62cbbSBarry Smith col -= aij->cstart; 2501eb62cbbSBarry Smith MatSetValues(aij->A,1,&row,1,&col,&val,addv); 2511eb62cbbSBarry Smith } 2521eb62cbbSBarry Smith else { 253*227d817aSBarry Smith if (mat->was_assembled) { 254b7c46309SBarry Smith if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 255dbb450caSBarry Smith col = aij->colmap[col] + shift; 256ec8511deSBarry Smith if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 2572493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 258*227d817aSBarry Smith col = (int) PetscReal(values[3*i+1]); 259d6dfbf8fSBarry Smith } 2609e25ed09SBarry Smith } 2611eb62cbbSBarry Smith MatSetValues(aij->B,1,&row,1,&col,&val,addv); 2621eb62cbbSBarry Smith } 2631eb62cbbSBarry Smith } 2641eb62cbbSBarry Smith count--; 2651eb62cbbSBarry Smith } 2660452661fSBarry Smith PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 2671eb62cbbSBarry Smith 2681eb62cbbSBarry Smith /* wait on sends */ 2691eb62cbbSBarry Smith if (aij->nsends) { 2700452661fSBarry Smith send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status)); 27178b31e54SBarry Smith CHKPTRQ(send_status); 2721eb62cbbSBarry Smith MPI_Waitall(aij->nsends,aij->send_waits,send_status); 2730452661fSBarry Smith PetscFree(send_status); 2741eb62cbbSBarry Smith } 2750452661fSBarry Smith PetscFree(aij->send_waits); PetscFree(aij->svalues); 2761eb62cbbSBarry Smith 27764119d58SLois Curfman McInnes aij->insertmode = NOT_SET_VALUES; 27878b31e54SBarry Smith ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 27978b31e54SBarry Smith ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 2801eb62cbbSBarry Smith 2812493cbb0SBarry Smith /* determine if any processor has disassembled, if so we must 2822493cbb0SBarry Smith also disassemble ourselfs, in order that we may reassemble. */ 283*227d817aSBarry Smith MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 284*227d817aSBarry Smith if (mat->was_assembled && !other_disassembled) { 2852493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 2862493cbb0SBarry Smith } 2872493cbb0SBarry Smith 288*227d817aSBarry Smith if (!mat->was_assembled && mode == FINAL_ASSEMBLY) { 28978b31e54SBarry Smith ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 2905e42470aSBarry Smith } 29178b31e54SBarry Smith ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 29278b31e54SBarry Smith ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 2935e42470aSBarry Smith 2948a729477SBarry Smith return 0; 2958a729477SBarry Smith } 2968a729477SBarry Smith 2972ee70a88SLois Curfman McInnes static int MatZeroEntries_MPIAIJ(Mat A) 2981eb62cbbSBarry Smith { 29944a69424SLois Curfman McInnes Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 300dbd7a890SLois Curfman McInnes int ierr; 30178b31e54SBarry Smith ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 30278b31e54SBarry Smith ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 3031eb62cbbSBarry Smith return 0; 3041eb62cbbSBarry Smith } 3051eb62cbbSBarry Smith 3061eb62cbbSBarry Smith /* the code does not do the diagonal entries correctly unless the 3071eb62cbbSBarry Smith matrix is square and the column and row owerships are identical. 3081eb62cbbSBarry Smith This is a BUG. The only way to fix it seems to be to access 3091eb62cbbSBarry Smith aij->A and aij->B directly and not through the MatZeroRows() 3101eb62cbbSBarry Smith routine. 3111eb62cbbSBarry Smith */ 31244a69424SLois Curfman McInnes static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 3131eb62cbbSBarry Smith { 31444a69424SLois Curfman McInnes Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 31517699dbbSLois Curfman McInnes int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 3166abc6512SBarry Smith int *procs,*nprocs,j,found,idx,nsends,*work; 31717699dbbSLois Curfman McInnes int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 3185392566eSBarry Smith int *rvalues,tag = A->tag,count,base,slen,n,*source; 319d6dfbf8fSBarry Smith int *lens,imdex,*lrows,*values; 320d6dfbf8fSBarry Smith MPI_Comm comm = A->comm; 3211eb62cbbSBarry Smith MPI_Request *send_waits,*recv_waits; 3221eb62cbbSBarry Smith MPI_Status recv_status,*send_status; 3231eb62cbbSBarry Smith IS istmp; 3241eb62cbbSBarry Smith 32578b31e54SBarry Smith ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr); 32678b31e54SBarry Smith ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 3271eb62cbbSBarry Smith 3281eb62cbbSBarry Smith /* first count number of contributors to each processor */ 3290452661fSBarry Smith nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 330cddf8d76SBarry Smith PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 3310452661fSBarry Smith owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 3321eb62cbbSBarry Smith for ( i=0; i<N; i++ ) { 3331eb62cbbSBarry Smith idx = rows[i]; 3341eb62cbbSBarry Smith found = 0; 33517699dbbSLois Curfman McInnes for ( j=0; j<size; j++ ) { 3361eb62cbbSBarry Smith if (idx >= owners[j] && idx < owners[j+1]) { 3371eb62cbbSBarry Smith nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 3381eb62cbbSBarry Smith } 3391eb62cbbSBarry Smith } 340bbb6d6a8SBarry Smith if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range"); 3411eb62cbbSBarry Smith } 34217699dbbSLois Curfman McInnes nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 3431eb62cbbSBarry Smith 3441eb62cbbSBarry Smith /* inform other processors of number of messages and max length*/ 3450452661fSBarry Smith work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 34617699dbbSLois Curfman McInnes MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 34717699dbbSLois Curfman McInnes nrecvs = work[rank]; 34817699dbbSLois Curfman McInnes MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 34917699dbbSLois Curfman McInnes nmax = work[rank]; 3500452661fSBarry Smith PetscFree(work); 3511eb62cbbSBarry Smith 3521eb62cbbSBarry Smith /* post receives: */ 3530452661fSBarry Smith rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 35478b31e54SBarry Smith CHKPTRQ(rvalues); 3550452661fSBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 35678b31e54SBarry Smith CHKPTRQ(recv_waits); 3571eb62cbbSBarry Smith for ( i=0; i<nrecvs; i++ ) { 358416022c9SBarry Smith MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 3591eb62cbbSBarry Smith } 3601eb62cbbSBarry Smith 3611eb62cbbSBarry Smith /* do sends: 3621eb62cbbSBarry Smith 1) starts[i] gives the starting index in svalues for stuff going to 3631eb62cbbSBarry Smith the ith processor 3641eb62cbbSBarry Smith */ 3650452661fSBarry Smith svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 3660452661fSBarry Smith send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 36778b31e54SBarry Smith CHKPTRQ(send_waits); 3680452661fSBarry Smith starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 3691eb62cbbSBarry Smith starts[0] = 0; 37017699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 3711eb62cbbSBarry Smith for ( i=0; i<N; i++ ) { 3721eb62cbbSBarry Smith svalues[starts[owner[i]]++] = rows[i]; 3731eb62cbbSBarry Smith } 3741eb62cbbSBarry Smith ISRestoreIndices(is,&rows); 3751eb62cbbSBarry Smith 3761eb62cbbSBarry Smith starts[0] = 0; 37717699dbbSLois Curfman McInnes for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 3781eb62cbbSBarry Smith count = 0; 37917699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 3801eb62cbbSBarry Smith if (procs[i]) { 381416022c9SBarry Smith MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 3821eb62cbbSBarry Smith } 3831eb62cbbSBarry Smith } 3840452661fSBarry Smith PetscFree(starts); 3851eb62cbbSBarry Smith 38617699dbbSLois Curfman McInnes base = owners[rank]; 3871eb62cbbSBarry Smith 3881eb62cbbSBarry Smith /* wait on receives */ 3890452661fSBarry Smith lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 3901eb62cbbSBarry Smith source = lens + nrecvs; 3911eb62cbbSBarry Smith count = nrecvs; slen = 0; 3921eb62cbbSBarry Smith while (count) { 393d6dfbf8fSBarry Smith MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 3941eb62cbbSBarry Smith /* unpack receives into our local space */ 3951eb62cbbSBarry Smith MPI_Get_count(&recv_status,MPI_INT,&n); 396d6dfbf8fSBarry Smith source[imdex] = recv_status.MPI_SOURCE; 397d6dfbf8fSBarry Smith lens[imdex] = n; 3981eb62cbbSBarry Smith slen += n; 3991eb62cbbSBarry Smith count--; 4001eb62cbbSBarry Smith } 4010452661fSBarry Smith PetscFree(recv_waits); 4021eb62cbbSBarry Smith 4031eb62cbbSBarry Smith /* move the data into the send scatter */ 4040452661fSBarry Smith lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 4051eb62cbbSBarry Smith count = 0; 4061eb62cbbSBarry Smith for ( i=0; i<nrecvs; i++ ) { 4071eb62cbbSBarry Smith values = rvalues + i*nmax; 4081eb62cbbSBarry Smith for ( j=0; j<lens[i]; j++ ) { 4091eb62cbbSBarry Smith lrows[count++] = values[j] - base; 4101eb62cbbSBarry Smith } 4111eb62cbbSBarry Smith } 4120452661fSBarry Smith PetscFree(rvalues); PetscFree(lens); 4130452661fSBarry Smith PetscFree(owner); PetscFree(nprocs); 4141eb62cbbSBarry Smith 4151eb62cbbSBarry Smith /* actually zap the local rows */ 416416022c9SBarry Smith ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 417464493b3SBarry Smith PLogObjectParent(A,istmp); 4180452661fSBarry Smith PetscFree(lrows); 41978b31e54SBarry Smith ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 42078b31e54SBarry Smith ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 42178b31e54SBarry Smith ierr = ISDestroy(istmp); CHKERRQ(ierr); 4221eb62cbbSBarry Smith 4231eb62cbbSBarry Smith /* wait on sends */ 4241eb62cbbSBarry Smith if (nsends) { 4250452661fSBarry Smith send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 42678b31e54SBarry Smith CHKPTRQ(send_status); 4271eb62cbbSBarry Smith MPI_Waitall(nsends,send_waits,send_status); 4280452661fSBarry Smith PetscFree(send_status); 4291eb62cbbSBarry Smith } 4300452661fSBarry Smith PetscFree(send_waits); PetscFree(svalues); 4311eb62cbbSBarry Smith 4321eb62cbbSBarry Smith return 0; 4331eb62cbbSBarry Smith } 4341eb62cbbSBarry Smith 435416022c9SBarry Smith static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 4361eb62cbbSBarry Smith { 437416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 4381eb62cbbSBarry Smith int ierr; 439416022c9SBarry Smith 44064119d58SLois Curfman McInnes ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 441416022c9SBarry Smith ierr = MatMult(a->A,xx,yy); CHKERRQ(ierr); 44264119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 443416022c9SBarry Smith ierr = MatMultAdd(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 4441eb62cbbSBarry Smith return 0; 4451eb62cbbSBarry Smith } 4461eb62cbbSBarry Smith 447416022c9SBarry Smith static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 448da3a660dSBarry Smith { 449416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 450da3a660dSBarry Smith int ierr; 45164119d58SLois Curfman McInnes ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 452416022c9SBarry Smith ierr = MatMultAdd(a->A,xx,yy,zz); CHKERRQ(ierr); 45364119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 454416022c9SBarry Smith ierr = MatMultAdd(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 455da3a660dSBarry Smith return 0; 456da3a660dSBarry Smith } 457da3a660dSBarry Smith 458416022c9SBarry Smith static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 459da3a660dSBarry Smith { 460416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 461da3a660dSBarry Smith int ierr; 462da3a660dSBarry Smith 463da3a660dSBarry Smith /* do nondiagonal part */ 464416022c9SBarry Smith ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr); 465da3a660dSBarry Smith /* send it on its way */ 466416022c9SBarry Smith ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES, 46764119d58SLois Curfman McInnes (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 468da3a660dSBarry Smith /* do local part */ 469416022c9SBarry Smith ierr = MatMultTrans(a->A,xx,yy); CHKERRQ(ierr); 470da3a660dSBarry Smith /* receive remote parts: note this assumes the values are not actually */ 471da3a660dSBarry Smith /* inserted in yy until the next line, which is true for my implementation*/ 472da3a660dSBarry Smith /* but is not perhaps always true. */ 473416022c9SBarry Smith ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES, 47464119d58SLois Curfman McInnes (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 475da3a660dSBarry Smith return 0; 476da3a660dSBarry Smith } 477da3a660dSBarry Smith 478416022c9SBarry Smith static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 479da3a660dSBarry Smith { 480416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 481da3a660dSBarry Smith int ierr; 482da3a660dSBarry Smith 483da3a660dSBarry Smith /* do nondiagonal part */ 484416022c9SBarry Smith ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr); 485da3a660dSBarry Smith /* send it on its way */ 486416022c9SBarry Smith ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES, 48764119d58SLois Curfman McInnes (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 488da3a660dSBarry Smith /* do local part */ 489416022c9SBarry Smith ierr = MatMultTransAdd(a->A,xx,yy,zz); CHKERRQ(ierr); 490da3a660dSBarry Smith /* receive remote parts: note this assumes the values are not actually */ 491da3a660dSBarry Smith /* inserted in yy until the next line, which is true for my implementation*/ 492da3a660dSBarry Smith /* but is not perhaps always true. */ 493416022c9SBarry Smith ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES, 49464119d58SLois Curfman McInnes (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 495da3a660dSBarry Smith return 0; 496da3a660dSBarry Smith } 497da3a660dSBarry Smith 4981eb62cbbSBarry Smith /* 4991eb62cbbSBarry Smith This only works correctly for square matrices where the subblock A->A is the 5001eb62cbbSBarry Smith diagonal block 5011eb62cbbSBarry Smith */ 502416022c9SBarry Smith static int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 5031eb62cbbSBarry Smith { 504416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 505416022c9SBarry Smith return MatGetDiagonal(a->A,v); 5061eb62cbbSBarry Smith } 5071eb62cbbSBarry Smith 508052efed2SBarry Smith static int MatScale_MPIAIJ(Scalar *aa,Mat A) 509052efed2SBarry Smith { 510052efed2SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 511052efed2SBarry Smith int ierr; 512052efed2SBarry Smith ierr = MatScale(aa,a->A); CHKERRQ(ierr); 513052efed2SBarry Smith ierr = MatScale(aa,a->B); CHKERRQ(ierr); 514052efed2SBarry Smith return 0; 515052efed2SBarry Smith } 516052efed2SBarry Smith 51744a69424SLois Curfman McInnes static int MatDestroy_MPIAIJ(PetscObject obj) 5181eb62cbbSBarry Smith { 5191eb62cbbSBarry Smith Mat mat = (Mat) obj; 52044a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 5211eb62cbbSBarry Smith int ierr; 522a5a9c739SBarry Smith #if defined(PETSC_LOG) 5236e35fa57SLois Curfman McInnes PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 524a5a9c739SBarry Smith #endif 5250452661fSBarry Smith PetscFree(aij->rowners); 52678b31e54SBarry Smith ierr = MatDestroy(aij->A); CHKERRQ(ierr); 52778b31e54SBarry Smith ierr = MatDestroy(aij->B); CHKERRQ(ierr); 5280452661fSBarry Smith if (aij->colmap) PetscFree(aij->colmap); 5290452661fSBarry Smith if (aij->garray) PetscFree(aij->garray); 5301eb62cbbSBarry Smith if (aij->lvec) VecDestroy(aij->lvec); 531a56f8943SBarry Smith if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 5320452661fSBarry Smith PetscFree(aij); 533a5a9c739SBarry Smith PLogObjectDestroy(mat); 5340452661fSBarry Smith PetscHeaderDestroy(mat); 5351eb62cbbSBarry Smith return 0; 5361eb62cbbSBarry Smith } 5377857610eSBarry Smith #include "draw.h" 538b16a3bb1SBarry Smith #include "pinclude/pviewer.h" 539ee50ffe9SBarry Smith 540416022c9SBarry Smith static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 5411eb62cbbSBarry Smith { 542416022c9SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 543416022c9SBarry Smith int ierr; 544416022c9SBarry Smith 54517699dbbSLois Curfman McInnes if (aij->size == 1) { 546416022c9SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 547416022c9SBarry Smith } 548a523beb4SLois Curfman McInnes else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported"); 549416022c9SBarry Smith return 0; 550416022c9SBarry Smith } 551416022c9SBarry Smith 552d7e8b826SBarry Smith static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 553416022c9SBarry Smith { 55444a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 555dbb450caSBarry Smith Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 556a56f8943SBarry Smith int ierr, format,shift = C->indexshift,rank; 557d13ab20cSBarry Smith PetscObject vobj = (PetscObject) viewer; 558d636dbe3SBarry Smith FILE *fd; 559416022c9SBarry Smith 560416022c9SBarry Smith if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) { 561416022c9SBarry Smith ierr = ViewerFileGetFormat_Private(viewer,&format); 56208480c60SBarry Smith if (format == FILE_FORMAT_INFO_DETAILED) { 563a56f8943SBarry Smith int nz,nzalloc,mem; 564a56f8943SBarry Smith MPI_Comm_rank(mat->comm,&rank); 565*227d817aSBarry Smith ierr = ViewerFileGetPointer(viewer,&fd); CHKERRQ(ierr); 566a56f8943SBarry Smith ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem); 567a56f8943SBarry Smith MPIU_Seq_begin(mat->comm,1); 568a56f8943SBarry Smith fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d \n",rank,aij->m,nz, 569a56f8943SBarry Smith nzalloc,mem); 57008480c60SBarry Smith ierr = MatGetInfo(aij->A,MAT_LOCAL,&nz,&nzalloc,&mem); 57108480c60SBarry Smith fprintf(fd,"[%d] on diagonal nz %d \n",rank,nz); 57208480c60SBarry Smith ierr = MatGetInfo(aij->B,MAT_LOCAL,&nz,&nzalloc,&mem); 57308480c60SBarry Smith fprintf(fd,"[%d] off diagonal nz %d \n",rank,nz); 574a56f8943SBarry Smith fflush(fd); 575a56f8943SBarry Smith MPIU_Seq_end(mat->comm,1); 576a40aa06bSLois Curfman McInnes ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 577416022c9SBarry Smith return 0; 578416022c9SBarry Smith } 57908480c60SBarry Smith else if (format == FILE_FORMAT_INFO) { 58008480c60SBarry Smith return 0; 58108480c60SBarry Smith } 582416022c9SBarry Smith } 583416022c9SBarry Smith 584416022c9SBarry Smith if (vobj->type == ASCII_FILE_VIEWER) { 585*227d817aSBarry Smith ierr = ViewerFileGetPointer(viewer,&fd); CHKERRQ(ierr); 5867f813858SBarry Smith MPIU_Seq_begin(mat->comm,1); 587d13ab20cSBarry Smith fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 58817699dbbSLois Curfman McInnes aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 5891eb62cbbSBarry Smith aij->cend); 59078b31e54SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 59178b31e54SBarry Smith ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 592d13ab20cSBarry Smith fflush(fd); 5937f813858SBarry Smith MPIU_Seq_end(mat->comm,1); 594d13ab20cSBarry Smith } 595416022c9SBarry Smith else { 596a56f8943SBarry Smith int size = aij->size; 597a56f8943SBarry Smith rank = aij->rank; 59817699dbbSLois Curfman McInnes if (size == 1) { 59978b31e54SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 60095373324SBarry Smith } 60195373324SBarry Smith else { 60295373324SBarry Smith /* assemble the entire matrix onto first processor. */ 60395373324SBarry Smith Mat A; 604ec8511deSBarry Smith Mat_SeqAIJ *Aloc; 6052eb8c8abSBarry Smith int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 60695373324SBarry Smith Scalar *a; 6072ee70a88SLois Curfman McInnes 60817699dbbSLois Curfman McInnes if (!rank) { 609b4fd4287SBarry Smith ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 610c451ab25SLois Curfman McInnes CHKERRQ(ierr); 61195373324SBarry Smith } 61295373324SBarry Smith else { 613b4fd4287SBarry Smith ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 614c451ab25SLois Curfman McInnes CHKERRQ(ierr); 61595373324SBarry Smith } 616464493b3SBarry Smith PLogObjectParent(mat,A); 617416022c9SBarry Smith 61895373324SBarry Smith /* copy over the A part */ 619ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) aij->A->data; 6202ee70a88SLois Curfman McInnes m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 62195373324SBarry Smith row = aij->rstart; 622dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 62395373324SBarry Smith for ( i=0; i<m; i++ ) { 624416022c9SBarry Smith ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 62595373324SBarry Smith row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 62695373324SBarry Smith } 6272ee70a88SLois Curfman McInnes aj = Aloc->j; 628dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 62995373324SBarry Smith 63095373324SBarry Smith /* copy over the B part */ 631ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) aij->B->data; 6322ee70a88SLois Curfman McInnes m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 63395373324SBarry Smith row = aij->rstart; 6340452661fSBarry Smith ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 635dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 63695373324SBarry Smith for ( i=0; i<m; i++ ) { 637416022c9SBarry Smith ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 63895373324SBarry Smith row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 63995373324SBarry Smith } 6400452661fSBarry Smith PetscFree(ct); 64178b31e54SBarry Smith ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 64278b31e54SBarry Smith ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 64317699dbbSLois Curfman McInnes if (!rank) { 64478b31e54SBarry Smith ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 64595373324SBarry Smith } 64678b31e54SBarry Smith ierr = MatDestroy(A); CHKERRQ(ierr); 64795373324SBarry Smith } 64895373324SBarry Smith } 6491eb62cbbSBarry Smith return 0; 6501eb62cbbSBarry Smith } 6511eb62cbbSBarry Smith 652416022c9SBarry Smith static int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 653416022c9SBarry Smith { 654416022c9SBarry Smith Mat mat = (Mat) obj; 655416022c9SBarry Smith int ierr; 656416022c9SBarry Smith PetscObject vobj = (PetscObject) viewer; 657416022c9SBarry Smith 658416022c9SBarry Smith if (!viewer) { 659416022c9SBarry Smith viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer; 660416022c9SBarry Smith } 661416022c9SBarry Smith if (vobj->cookie == DRAW_COOKIE && vobj->type == NULLWINDOW) return 0; 662416022c9SBarry Smith else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) { 663d7e8b826SBarry Smith ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 664416022c9SBarry Smith } 665416022c9SBarry Smith else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) { 666d7e8b826SBarry Smith ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 667d7e8b826SBarry Smith } 668d7e8b826SBarry Smith else if (vobj->cookie == VIEWER_COOKIE && vobj->type == MATLAB_VIEWER) { 669d7e8b826SBarry Smith ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 670416022c9SBarry Smith } 671416022c9SBarry Smith else if (vobj->cookie == DRAW_COOKIE) { 672d7e8b826SBarry Smith ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 673416022c9SBarry Smith } 674416022c9SBarry Smith else if (vobj->type == BINARY_FILE_VIEWER) { 675416022c9SBarry Smith return MatView_MPIAIJ_Binary(mat,viewer); 676416022c9SBarry Smith } 677416022c9SBarry Smith return 0; 678416022c9SBarry Smith } 679416022c9SBarry Smith 680ec8511deSBarry Smith extern int MatMarkDiag_SeqAIJ(Mat); 6811eb62cbbSBarry Smith /* 6821eb62cbbSBarry Smith This has to provide several versions. 6831eb62cbbSBarry Smith 6841eb62cbbSBarry Smith 1) per sequential 6851eb62cbbSBarry Smith 2) a) use only local smoothing updating outer values only once. 6861eb62cbbSBarry Smith b) local smoothing updating outer values each inner iteration 687d6dfbf8fSBarry Smith 3) color updating out values betwen colors. 6881eb62cbbSBarry Smith */ 689fc76ce87SLois Curfman McInnes static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 690dbb450caSBarry Smith double fshift,int its,Vec xx) 6918a729477SBarry Smith { 69244a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 693d6dfbf8fSBarry Smith Mat AA = mat->A, BB = mat->B; 694ec8511deSBarry Smith Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 6956abc6512SBarry Smith Scalar zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts; 6966abc6512SBarry Smith int ierr,*idx, *diag; 697416022c9SBarry Smith int n = mat->n, m = mat->m, i,shift = A->indexshift; 698da3a660dSBarry Smith Vec tt; 6998a729477SBarry Smith 700d6dfbf8fSBarry Smith VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 701dbb450caSBarry Smith xs = x + shift; /* shift by one for index start of 1 */ 702dbb450caSBarry Smith ls = ls + shift; 703ec8511deSBarry Smith if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;} 704d6dfbf8fSBarry Smith diag = A->diag; 705acb40c82SBarry Smith if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) { 70648d91487SBarry Smith SETERRQ(1,"MatRelax_MPIAIJ:Option not supported"); 707acb40c82SBarry Smith } 708da3a660dSBarry Smith if (flag & SOR_EISENSTAT) { 709da3a660dSBarry Smith /* Let A = L + U + D; where L is lower trianglar, 710da3a660dSBarry Smith U is upper triangular, E is diagonal; This routine applies 711da3a660dSBarry Smith 712da3a660dSBarry Smith (L + E)^{-1} A (U + E)^{-1} 713da3a660dSBarry Smith 714da3a660dSBarry Smith to a vector efficiently using Eisenstat's trick. This is for 715da3a660dSBarry Smith the case of SSOR preconditioner, so E is D/omega where omega 716da3a660dSBarry Smith is the relaxation factor. 717da3a660dSBarry Smith */ 71878b31e54SBarry Smith ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr); 719464493b3SBarry Smith PLogObjectParent(matin,tt); 720da3a660dSBarry Smith VecGetArray(tt,&t); 721da3a660dSBarry Smith scale = (2.0/omega) - 1.0; 722da3a660dSBarry Smith /* x = (E + U)^{-1} b */ 723da3a660dSBarry Smith VecSet(&zero,mat->lvec); 72464119d58SLois Curfman McInnes ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 72578b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 726da3a660dSBarry Smith for ( i=m-1; i>-1; i-- ) { 727da3a660dSBarry Smith n = A->i[i+1] - diag[i] - 1; 728dbb450caSBarry Smith idx = A->j + diag[i] + !shift; 729dbb450caSBarry Smith v = A->a + diag[i] + !shift; 730da3a660dSBarry Smith sum = b[i]; 731da3a660dSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 732dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 733da3a660dSBarry Smith n = B->i[i+1] - B->i[i]; 734dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 735dbb450caSBarry Smith v = B->a + B->i[i] + shift; 736da3a660dSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 737da3a660dSBarry Smith x[i] = omega*(sum/d); 738da3a660dSBarry Smith } 73964119d58SLois Curfman McInnes ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 74078b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 741da3a660dSBarry Smith 742da3a660dSBarry Smith /* t = b - (2*E - D)x */ 743da3a660dSBarry Smith v = A->a; 744dbb450caSBarry Smith for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ + shift])*x[i]; } 745da3a660dSBarry Smith 746da3a660dSBarry Smith /* t = (E + L)^{-1}t */ 747dbb450caSBarry Smith ts = t + shift; /* shifted by one for index start of a or mat->j*/ 748da3a660dSBarry Smith diag = A->diag; 749da3a660dSBarry Smith VecSet(&zero,mat->lvec); 75064119d58SLois Curfman McInnes ierr = VecPipelineBegin(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 75178b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 752da3a660dSBarry Smith for ( i=0; i<m; i++ ) { 753da3a660dSBarry Smith n = diag[i] - A->i[i]; 754dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 755dbb450caSBarry Smith v = A->a + A->i[i] + shift; 756da3a660dSBarry Smith sum = t[i]; 757da3a660dSBarry Smith SPARSEDENSEMDOT(sum,ts,v,idx,n); 758dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 759da3a660dSBarry Smith n = B->i[i+1] - B->i[i]; 760dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 761dbb450caSBarry Smith v = B->a + B->i[i] + shift; 762da3a660dSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 763da3a660dSBarry Smith t[i] = omega*(sum/d); 764da3a660dSBarry Smith } 76564119d58SLois Curfman McInnes ierr = VecPipelineEnd(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 76678b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 767da3a660dSBarry Smith /* x = x + t */ 768da3a660dSBarry Smith for ( i=0; i<m; i++ ) { x[i] += t[i]; } 769da3a660dSBarry Smith VecDestroy(tt); 770da3a660dSBarry Smith return 0; 771da3a660dSBarry Smith } 772da3a660dSBarry Smith 7731eb62cbbSBarry Smith 774d6dfbf8fSBarry Smith if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){ 775da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 776da3a660dSBarry Smith VecSet(&zero,mat->lvec); VecSet(&zero,xx); 777da3a660dSBarry Smith } 778da3a660dSBarry Smith else { 77964119d58SLois Curfman McInnes ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 78078b31e54SBarry Smith CHKERRQ(ierr); 78164119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 78278b31e54SBarry Smith CHKERRQ(ierr); 783da3a660dSBarry Smith } 784d6dfbf8fSBarry Smith while (its--) { 785d6dfbf8fSBarry Smith /* go down through the rows */ 78664119d58SLois Curfman McInnes ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 78778b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 788d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 789d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 790dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 791dbb450caSBarry Smith v = A->a + A->i[i] + shift; 792d6dfbf8fSBarry Smith sum = b[i]; 793d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 794dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 795d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 796dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 797dbb450caSBarry Smith v = B->a + B->i[i] + shift; 798d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 799d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 800d6dfbf8fSBarry Smith } 80164119d58SLois Curfman McInnes ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 80278b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 803d6dfbf8fSBarry Smith /* come up through the rows */ 80464119d58SLois Curfman McInnes ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 80578b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 806d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 807d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 808dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 809dbb450caSBarry Smith v = A->a + A->i[i] + shift; 810d6dfbf8fSBarry Smith sum = b[i]; 811d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 812dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 813d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 814dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 815dbb450caSBarry Smith v = B->a + B->i[i] + shift; 816d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 817d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 818d6dfbf8fSBarry Smith } 81964119d58SLois Curfman McInnes ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 82078b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 821d6dfbf8fSBarry Smith } 822d6dfbf8fSBarry Smith } 823d6dfbf8fSBarry Smith else if (flag & SOR_FORWARD_SWEEP){ 824da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 825da3a660dSBarry Smith VecSet(&zero,mat->lvec); 82664119d58SLois Curfman McInnes ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 82778b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 828da3a660dSBarry Smith for ( i=0; i<m; i++ ) { 829da3a660dSBarry Smith n = diag[i] - A->i[i]; 830dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 831dbb450caSBarry Smith v = A->a + A->i[i] + shift; 832da3a660dSBarry Smith sum = b[i]; 833da3a660dSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 834dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 835da3a660dSBarry Smith n = B->i[i+1] - B->i[i]; 836dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 837dbb450caSBarry Smith v = B->a + B->i[i] + shift; 838da3a660dSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 839da3a660dSBarry Smith x[i] = omega*(sum/d); 840da3a660dSBarry Smith } 84164119d58SLois Curfman McInnes ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 84278b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 843da3a660dSBarry Smith its--; 844da3a660dSBarry Smith } 845d6dfbf8fSBarry Smith while (its--) { 84664119d58SLois Curfman McInnes ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 84778b31e54SBarry Smith CHKERRQ(ierr); 84864119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 84978b31e54SBarry Smith CHKERRQ(ierr); 85064119d58SLois Curfman McInnes ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 85178b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 852d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 853d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 854dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 855dbb450caSBarry Smith v = A->a + A->i[i] + shift; 856d6dfbf8fSBarry Smith sum = b[i]; 857d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 858dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 859d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 860dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 861dbb450caSBarry Smith v = B->a + B->i[i] + shift; 862d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 863d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 864d6dfbf8fSBarry Smith } 86564119d58SLois Curfman McInnes ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 86678b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 867d6dfbf8fSBarry Smith } 868d6dfbf8fSBarry Smith } 869d6dfbf8fSBarry Smith else if (flag & SOR_BACKWARD_SWEEP){ 870da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 871da3a660dSBarry Smith VecSet(&zero,mat->lvec); 87264119d58SLois Curfman McInnes ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 87378b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 874da3a660dSBarry Smith for ( i=m-1; i>-1; i-- ) { 875da3a660dSBarry Smith n = A->i[i+1] - diag[i] - 1; 876dbb450caSBarry Smith idx = A->j + diag[i] + !shift; 877dbb450caSBarry Smith v = A->a + diag[i] + !shift; 878da3a660dSBarry Smith sum = b[i]; 879da3a660dSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 880dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 881da3a660dSBarry Smith n = B->i[i+1] - B->i[i]; 882dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 883dbb450caSBarry Smith v = B->a + B->i[i] + shift; 884da3a660dSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 885da3a660dSBarry Smith x[i] = omega*(sum/d); 886da3a660dSBarry Smith } 88764119d58SLois Curfman McInnes ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 88878b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 889da3a660dSBarry Smith its--; 890da3a660dSBarry Smith } 891d6dfbf8fSBarry Smith while (its--) { 89264119d58SLois Curfman McInnes ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN, 89378b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 89464119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN, 89578b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 89664119d58SLois Curfman McInnes ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 89778b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 898d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 899d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 900dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 901dbb450caSBarry Smith v = A->a + A->i[i] + shift; 902d6dfbf8fSBarry Smith sum = b[i]; 903d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 904dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 905d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 906dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 907dbb450caSBarry Smith v = B->a + B->i[i] + shift; 908d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 909d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 910d6dfbf8fSBarry Smith } 91164119d58SLois Curfman McInnes ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 91278b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 913d6dfbf8fSBarry Smith } 914d6dfbf8fSBarry Smith } 915d6dfbf8fSBarry Smith else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 916da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 917dbb450caSBarry Smith return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 918da3a660dSBarry Smith } 91964119d58SLois Curfman McInnes ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 92078b31e54SBarry Smith CHKERRQ(ierr); 92164119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 92278b31e54SBarry Smith CHKERRQ(ierr); 923d6dfbf8fSBarry Smith while (its--) { 924d6dfbf8fSBarry Smith /* go down through the rows */ 925d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 926d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 927dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 928dbb450caSBarry Smith v = A->a + A->i[i] + shift; 929d6dfbf8fSBarry Smith sum = b[i]; 930d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 931dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 932d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 933dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 934dbb450caSBarry Smith v = B->a + B->i[i] + shift; 935d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 936d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 937d6dfbf8fSBarry Smith } 938d6dfbf8fSBarry Smith /* come up through the rows */ 939d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 940d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 941dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 942dbb450caSBarry Smith v = A->a + A->i[i] + shift; 943d6dfbf8fSBarry Smith sum = b[i]; 944d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 945dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 946d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 947dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 948dbb450caSBarry Smith v = B->a + B->i[i] + shift; 949d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 950d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 951d6dfbf8fSBarry Smith } 952d6dfbf8fSBarry Smith } 953d6dfbf8fSBarry Smith } 954d6dfbf8fSBarry Smith else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 955da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 956dbb450caSBarry Smith return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 957da3a660dSBarry Smith } 95864119d58SLois Curfman McInnes ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 95978b31e54SBarry Smith CHKERRQ(ierr); 96064119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 96178b31e54SBarry Smith CHKERRQ(ierr); 962d6dfbf8fSBarry Smith while (its--) { 963d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 964d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 965dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 966dbb450caSBarry Smith v = A->a + A->i[i] + shift; 967d6dfbf8fSBarry Smith sum = b[i]; 968d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 969dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 970d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 971dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 972dbb450caSBarry Smith v = B->a + B->i[i] + shift; 973d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 974d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 975d6dfbf8fSBarry Smith } 976d6dfbf8fSBarry Smith } 977d6dfbf8fSBarry Smith } 978d6dfbf8fSBarry Smith else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 979da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 980dbb450caSBarry Smith return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 981da3a660dSBarry Smith } 98264119d58SLois Curfman McInnes ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 98378b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 98464119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 98578b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 986d6dfbf8fSBarry Smith while (its--) { 987d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 988d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 989dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 990dbb450caSBarry Smith v = A->a + A->i[i] + shift; 991d6dfbf8fSBarry Smith sum = b[i]; 992d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 993dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 994d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 995dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 996dbb450caSBarry Smith v = B->a + B->i[i] + shift; 997d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 998d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 999d6dfbf8fSBarry Smith } 1000d6dfbf8fSBarry Smith } 1001d6dfbf8fSBarry Smith } 10028a729477SBarry Smith return 0; 10038a729477SBarry Smith } 1004a66be287SLois Curfman McInnes 1005fc76ce87SLois Curfman McInnes static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz, 1006fc76ce87SLois Curfman McInnes int *nzalloc,int *mem) 1007a66be287SLois Curfman McInnes { 1008a66be287SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1009a66be287SLois Curfman McInnes Mat A = mat->A, B = mat->B; 1010a66be287SLois Curfman McInnes int ierr, isend[3], irecv[3], nzA, nzallocA, memA; 1011a66be287SLois Curfman McInnes 101278b31e54SBarry Smith ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr); 101378b31e54SBarry Smith ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr); 1014a66be287SLois Curfman McInnes isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA; 1015a66be287SLois Curfman McInnes if (flag == MAT_LOCAL) { 1016a66be287SLois Curfman McInnes *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2]; 1017a66be287SLois Curfman McInnes } else if (flag == MAT_GLOBAL_MAX) { 1018d65a2f8fSBarry Smith MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm); 1019a66be287SLois Curfman McInnes *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 1020a66be287SLois Curfman McInnes } else if (flag == MAT_GLOBAL_SUM) { 1021d65a2f8fSBarry Smith MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm); 1022a66be287SLois Curfman McInnes *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 1023a66be287SLois Curfman McInnes } 1024a66be287SLois Curfman McInnes return 0; 1025a66be287SLois Curfman McInnes } 1026a66be287SLois Curfman McInnes 1027299609e3SLois Curfman McInnes extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 1028299609e3SLois Curfman McInnes extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 1029299609e3SLois Curfman McInnes extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 1030299609e3SLois Curfman McInnes extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 1031299609e3SLois Curfman McInnes extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 1032299609e3SLois Curfman McInnes extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1033299609e3SLois Curfman McInnes extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 1034299609e3SLois Curfman McInnes extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1035299609e3SLois Curfman McInnes 1036416022c9SBarry Smith static int MatSetOption_MPIAIJ(Mat A,MatOption op) 1037c74985f6SBarry Smith { 1038c0bbcb79SLois Curfman McInnes Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1039c74985f6SBarry Smith 1040c0bbcb79SLois Curfman McInnes if (op == NO_NEW_NONZERO_LOCATIONS || 1041c0bbcb79SLois Curfman McInnes op == YES_NEW_NONZERO_LOCATIONS || 1042c0bbcb79SLois Curfman McInnes op == COLUMNS_SORTED || 1043c0bbcb79SLois Curfman McInnes op == ROW_ORIENTED) { 1044c0bbcb79SLois Curfman McInnes MatSetOption(a->A,op); 1045c0bbcb79SLois Curfman McInnes MatSetOption(a->B,op); 1046c74985f6SBarry Smith } 1047c0bbcb79SLois Curfman McInnes else if (op == ROWS_SORTED || 1048c0bbcb79SLois Curfman McInnes op == SYMMETRIC_MATRIX || 1049c0bbcb79SLois Curfman McInnes op == STRUCTURALLY_SYMMETRIC_MATRIX || 1050c0bbcb79SLois Curfman McInnes op == YES_NEW_DIAGONALS) 1051c0bbcb79SLois Curfman McInnes PLogInfo((PetscObject)A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 10524b0e389bSBarry Smith else if (op == COLUMN_ORIENTED) { 10534b0e389bSBarry Smith a->roworiented = 0; 10544b0e389bSBarry Smith MatSetOption(a->A,op); 10554b0e389bSBarry Smith MatSetOption(a->B,op); 10564b0e389bSBarry Smith } 1057c0bbcb79SLois Curfman McInnes else if (op == NO_NEW_DIAGONALS) 10584d39ef84SLois Curfman McInnes {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:NO_NEW_DIAGONALS");} 1059c0bbcb79SLois Curfman McInnes else 10604d39ef84SLois Curfman McInnes {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");} 1061c74985f6SBarry Smith return 0; 1062c74985f6SBarry Smith } 1063c74985f6SBarry Smith 1064d1710a03SLois Curfman McInnes static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1065c74985f6SBarry Smith { 106644a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1067c74985f6SBarry Smith *m = mat->M; *n = mat->N; 1068c74985f6SBarry Smith return 0; 1069c74985f6SBarry Smith } 1070c74985f6SBarry Smith 1071d1710a03SLois Curfman McInnes static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1072c74985f6SBarry Smith { 107344a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1074b7c46309SBarry Smith *m = mat->m; *n = mat->N; 1075c74985f6SBarry Smith return 0; 1076c74985f6SBarry Smith } 1077c74985f6SBarry Smith 1078d1710a03SLois Curfman McInnes static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1079c74985f6SBarry Smith { 108044a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1081c74985f6SBarry Smith *m = mat->rstart; *n = mat->rend; 1082c74985f6SBarry Smith return 0; 1083c74985f6SBarry Smith } 1084c74985f6SBarry Smith 108539e00950SLois Curfman McInnes static int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 108639e00950SLois Curfman McInnes { 1087154123eaSLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1088154123eaSLois Curfman McInnes Scalar *vworkA, *vworkB, **pvA, **pvB; 1089154123eaSLois Curfman McInnes int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1090154123eaSLois Curfman McInnes int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 109139e00950SLois Curfman McInnes 1092b49de8d1SLois Curfman McInnes if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:Only local rows") 1093abc0e9e4SLois Curfman McInnes lrow = row - rstart; 109439e00950SLois Curfman McInnes 1095154123eaSLois Curfman McInnes pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1096154123eaSLois Curfman McInnes if (!v) {pvA = 0; pvB = 0;} 1097154123eaSLois Curfman McInnes if (!idx) {pcA = 0; if (!v) pcB = 0;} 109878b31e54SBarry Smith ierr = MatGetRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 109978b31e54SBarry Smith ierr = MatGetRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1100154123eaSLois Curfman McInnes nztot = nzA + nzB; 1101154123eaSLois Curfman McInnes 1102154123eaSLois Curfman McInnes if (v || idx) { 1103154123eaSLois Curfman McInnes if (nztot) { 1104154123eaSLois Curfman McInnes /* Sort by increasing column numbers, assuming A and B already sorted */ 1105299609e3SLois Curfman McInnes int imark; 1106154123eaSLois Curfman McInnes for (i=0; i<nzB; i++) cworkB[i] = mat->garray[cworkB[i]]; 1107154123eaSLois Curfman McInnes if (v) { 11080452661fSBarry Smith *v = (Scalar *) PetscMalloc( (nztot)*sizeof(Scalar) ); CHKPTRQ(*v); 110939e00950SLois Curfman McInnes for ( i=0; i<nzB; i++ ) { 1110154123eaSLois Curfman McInnes if (cworkB[i] < cstart) (*v)[i] = vworkB[i]; 1111154123eaSLois Curfman McInnes else break; 1112154123eaSLois Curfman McInnes } 1113154123eaSLois Curfman McInnes imark = i; 1114154123eaSLois Curfman McInnes for ( i=0; i<nzA; i++ ) (*v)[imark+i] = vworkA[i]; 1115299609e3SLois Curfman McInnes for ( i=imark; i<nzB; i++ ) (*v)[nzA+i] = vworkB[i]; 1116154123eaSLois Curfman McInnes } 1117154123eaSLois Curfman McInnes if (idx) { 11180452661fSBarry Smith *idx = (int *) PetscMalloc( (nztot)*sizeof(int) ); CHKPTRQ(*idx); 1119154123eaSLois Curfman McInnes for (i=0; i<nzA; i++) cworkA[i] += cstart; 1120154123eaSLois Curfman McInnes for ( i=0; i<nzB; i++ ) { 1121154123eaSLois Curfman McInnes if (cworkB[i] < cstart) (*idx)[i] = cworkB[i]; 1122154123eaSLois Curfman McInnes else break; 1123154123eaSLois Curfman McInnes } 1124154123eaSLois Curfman McInnes imark = i; 1125154123eaSLois Curfman McInnes for ( i=0; i<nzA; i++ ) (*idx)[imark+i] = cworkA[i]; 1126299609e3SLois Curfman McInnes for ( i=imark; i<nzB; i++ ) (*idx)[nzA+i] = cworkB[i]; 112739e00950SLois Curfman McInnes } 112839e00950SLois Curfman McInnes } 112939e00950SLois Curfman McInnes else {*idx = 0; *v=0;} 1130154123eaSLois Curfman McInnes } 113139e00950SLois Curfman McInnes *nz = nztot; 113278b31e54SBarry Smith ierr = MatRestoreRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 113378b31e54SBarry Smith ierr = MatRestoreRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 113439e00950SLois Curfman McInnes return 0; 113539e00950SLois Curfman McInnes } 113639e00950SLois Curfman McInnes 113739e00950SLois Curfman McInnes static int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 113839e00950SLois Curfman McInnes { 11390452661fSBarry Smith if (idx) PetscFree(*idx); 11400452661fSBarry Smith if (v) PetscFree(*v); 114139e00950SLois Curfman McInnes return 0; 114239e00950SLois Curfman McInnes } 114339e00950SLois Curfman McInnes 1144cddf8d76SBarry Smith static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1145855ac2c5SLois Curfman McInnes { 1146855ac2c5SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1147ec8511deSBarry Smith Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1148416022c9SBarry Smith int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1149416022c9SBarry Smith double sum = 0.0; 115004ca555eSLois Curfman McInnes Scalar *v; 115104ca555eSLois Curfman McInnes 115217699dbbSLois Curfman McInnes if (aij->size == 1) { 115314183eadSLois Curfman McInnes ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 115437fa93a5SLois Curfman McInnes } else { 115504ca555eSLois Curfman McInnes if (type == NORM_FROBENIUS) { 115604ca555eSLois Curfman McInnes v = amat->a; 115704ca555eSLois Curfman McInnes for (i=0; i<amat->nz; i++ ) { 115804ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX) 115904ca555eSLois Curfman McInnes sum += real(conj(*v)*(*v)); v++; 116004ca555eSLois Curfman McInnes #else 116104ca555eSLois Curfman McInnes sum += (*v)*(*v); v++; 116204ca555eSLois Curfman McInnes #endif 116304ca555eSLois Curfman McInnes } 116404ca555eSLois Curfman McInnes v = bmat->a; 116504ca555eSLois Curfman McInnes for (i=0; i<bmat->nz; i++ ) { 116604ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX) 116704ca555eSLois Curfman McInnes sum += real(conj(*v)*(*v)); v++; 116804ca555eSLois Curfman McInnes #else 116904ca555eSLois Curfman McInnes sum += (*v)*(*v); v++; 117004ca555eSLois Curfman McInnes #endif 117104ca555eSLois Curfman McInnes } 117204ca555eSLois Curfman McInnes MPI_Allreduce((void*)&sum,(void*)norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 117304ca555eSLois Curfman McInnes *norm = sqrt(*norm); 117404ca555eSLois Curfman McInnes } 117504ca555eSLois Curfman McInnes else if (type == NORM_1) { /* max column norm */ 117604ca555eSLois Curfman McInnes double *tmp, *tmp2; 117704ca555eSLois Curfman McInnes int *jj, *garray = aij->garray; 11780452661fSBarry Smith tmp = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp); 11790452661fSBarry Smith tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2); 1180cddf8d76SBarry Smith PetscMemzero(tmp,aij->N*sizeof(double)); 118104ca555eSLois Curfman McInnes *norm = 0.0; 118204ca555eSLois Curfman McInnes v = amat->a; jj = amat->j; 118304ca555eSLois Curfman McInnes for ( j=0; j<amat->nz; j++ ) { 1184579c6b6fSBarry Smith tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 118504ca555eSLois Curfman McInnes } 118604ca555eSLois Curfman McInnes v = bmat->a; jj = bmat->j; 118704ca555eSLois Curfman McInnes for ( j=0; j<bmat->nz; j++ ) { 1188579c6b6fSBarry Smith tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 118904ca555eSLois Curfman McInnes } 119004ca555eSLois Curfman McInnes MPI_Allreduce((void*)tmp,(void*)tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm); 119104ca555eSLois Curfman McInnes for ( j=0; j<aij->N; j++ ) { 119204ca555eSLois Curfman McInnes if (tmp2[j] > *norm) *norm = tmp2[j]; 119304ca555eSLois Curfman McInnes } 11940452661fSBarry Smith PetscFree(tmp); PetscFree(tmp2); 119504ca555eSLois Curfman McInnes } 119604ca555eSLois Curfman McInnes else if (type == NORM_INFINITY) { /* max row norm */ 1197515d9167SLois Curfman McInnes double ntemp = 0.0; 119804ca555eSLois Curfman McInnes for ( j=0; j<amat->m; j++ ) { 1199dbb450caSBarry Smith v = amat->a + amat->i[j] + shift; 120004ca555eSLois Curfman McInnes sum = 0.0; 120104ca555eSLois Curfman McInnes for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1202cddf8d76SBarry Smith sum += PetscAbsScalar(*v); v++; 120304ca555eSLois Curfman McInnes } 1204dbb450caSBarry Smith v = bmat->a + bmat->i[j] + shift; 120504ca555eSLois Curfman McInnes for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1206cddf8d76SBarry Smith sum += PetscAbsScalar(*v); v++; 120704ca555eSLois Curfman McInnes } 1208515d9167SLois Curfman McInnes if (sum > ntemp) ntemp = sum; 120904ca555eSLois Curfman McInnes } 1210515d9167SLois Curfman McInnes MPI_Allreduce((void*)&ntemp,(void*)norm,1,MPI_DOUBLE,MPI_MAX,mat->comm); 121104ca555eSLois Curfman McInnes } 121204ca555eSLois Curfman McInnes else { 1213515d9167SLois Curfman McInnes SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm"); 121404ca555eSLois Curfman McInnes } 121537fa93a5SLois Curfman McInnes } 1216855ac2c5SLois Curfman McInnes return 0; 1217855ac2c5SLois Curfman McInnes } 1218855ac2c5SLois Curfman McInnes 12190de55854SLois Curfman McInnes static int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1220b7c46309SBarry Smith { 1221b7c46309SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1222dbb450caSBarry Smith Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1223416022c9SBarry Smith int ierr,shift = Aloc->indexshift; 1224416022c9SBarry Smith Mat B; 1225b7c46309SBarry Smith int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1226b7c46309SBarry Smith Scalar *array; 1227b7c46309SBarry Smith 1228416022c9SBarry Smith if (!matout && M != N) SETERRQ(1,"MatTranspose_MPIAIJ:Square only in-place"); 1229b4fd4287SBarry Smith ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0, 1230b4fd4287SBarry Smith PETSC_NULL,&B); CHKERRQ(ierr); 1231b7c46309SBarry Smith 1232b7c46309SBarry Smith /* copy over the A part */ 1233ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) a->A->data; 1234b7c46309SBarry Smith m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1235b7c46309SBarry Smith row = a->rstart; 1236dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1237b7c46309SBarry Smith for ( i=0; i<m; i++ ) { 1238416022c9SBarry Smith ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1239b7c46309SBarry Smith row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1240b7c46309SBarry Smith } 1241b7c46309SBarry Smith aj = Aloc->j; 1242dbb450caSBarry Smith for ( i=0; i<ai[m]|+shift; i++ ) {aj[i] -= a->cstart + shift;} 1243b7c46309SBarry Smith 1244b7c46309SBarry Smith /* copy over the B part */ 1245ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) a->B->data; 1246b7c46309SBarry Smith m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1247b7c46309SBarry Smith row = a->rstart; 12480452661fSBarry Smith ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1249dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1250b7c46309SBarry Smith for ( i=0; i<m; i++ ) { 1251416022c9SBarry Smith ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1252b7c46309SBarry Smith row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1253b7c46309SBarry Smith } 12540452661fSBarry Smith PetscFree(ct); 1255b7c46309SBarry Smith ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1256b7c46309SBarry Smith ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 12570de55854SLois Curfman McInnes if (matout) { 12580de55854SLois Curfman McInnes *matout = B; 12590de55854SLois Curfman McInnes } else { 12600de55854SLois Curfman McInnes /* This isn't really an in-place transpose .... but free data structures from a */ 12610452661fSBarry Smith PetscFree(a->rowners); 12620de55854SLois Curfman McInnes ierr = MatDestroy(a->A); CHKERRQ(ierr); 12630de55854SLois Curfman McInnes ierr = MatDestroy(a->B); CHKERRQ(ierr); 12640452661fSBarry Smith if (a->colmap) PetscFree(a->colmap); 12650452661fSBarry Smith if (a->garray) PetscFree(a->garray); 12660de55854SLois Curfman McInnes if (a->lvec) VecDestroy(a->lvec); 1267a56f8943SBarry Smith if (a->Mvctx) VecScatterDestroy(a->Mvctx); 12680452661fSBarry Smith PetscFree(a); 1269416022c9SBarry Smith PetscMemcpy(A,B,sizeof(struct _Mat)); 12700452661fSBarry Smith PetscHeaderDestroy(B); 12710de55854SLois Curfman McInnes } 1272b7c46309SBarry Smith return 0; 1273b7c46309SBarry Smith } 1274b7c46309SBarry Smith 1275682d7d0cSBarry Smith extern int MatPrintHelp_SeqAIJ(Mat); 1276682d7d0cSBarry Smith static int MatPrintHelp_MPIAIJ(Mat A) 1277682d7d0cSBarry Smith { 1278682d7d0cSBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1279682d7d0cSBarry Smith 1280682d7d0cSBarry Smith if (!a->rank) return MatPrintHelp_SeqAIJ(a->A); 1281682d7d0cSBarry Smith else return 0; 1282682d7d0cSBarry Smith } 1283682d7d0cSBarry Smith 1284fc76ce87SLois Curfman McInnes extern int MatConvert_MPIAIJ(Mat,MatType,Mat *); 12853d1612f7SBarry Smith static int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 12862f86bd48SSatish Balay extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 12878a729477SBarry Smith /* -------------------------------------------------------------------*/ 12882ee70a88SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 128939e00950SLois Curfman McInnes MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 129044a69424SLois Curfman McInnes MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 129144a69424SLois Curfman McInnes MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1292299609e3SLois Curfman McInnes MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ, 1293299609e3SLois Curfman McInnes MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ, 1294299609e3SLois Curfman McInnes MatLUFactor_MPIAIJ,0, 129544a69424SLois Curfman McInnes MatRelax_MPIAIJ, 1296b7c46309SBarry Smith MatTranspose_MPIAIJ, 1297a66be287SLois Curfman McInnes MatGetInfo_MPIAIJ,0, 1298855ac2c5SLois Curfman McInnes MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ, 1299ee50ffe9SBarry Smith MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 13001eb62cbbSBarry Smith 0, 1301299609e3SLois Curfman McInnes MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1302299609e3SLois Curfman McInnes MatGetReordering_MPIAIJ, 1303299609e3SLois Curfman McInnes MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0, 1304d1710a03SLois Curfman McInnes MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1305299609e3SLois Curfman McInnes MatILUFactorSymbolic_MPIAIJ,0, 13063d1612f7SBarry Smith 0,0,MatConvert_MPIAIJ,0,0,MatConvertSameType_MPIAIJ,0,0, 1307b49de8d1SLois Curfman McInnes 0,0,0, 13087fab95ffSSatish Balay 0,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0, 1309052efed2SBarry Smith MatPrintHelp_MPIAIJ, 1310052efed2SBarry Smith MatScale_MPIAIJ}; 13118a729477SBarry Smith 13121987afe7SBarry Smith /*@C 1313ff756334SLois Curfman McInnes MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1314ff756334SLois Curfman McInnes (the default parallel PETSc format). 13158a729477SBarry Smith 13168a729477SBarry Smith Input Parameters: 13171eb62cbbSBarry Smith . comm - MPI communicator 13187d3e4905SLois Curfman McInnes . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 13197d3e4905SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated 13207d3e4905SLois Curfman McInnes if N is given) 13217d3e4905SLois Curfman McInnes . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 13227d3e4905SLois Curfman McInnes . N - number of global columns (or PETSC_DECIDE to have calculated 13237d3e4905SLois Curfman McInnes if n is given) 1324ab693e5aSLois Curfman McInnes . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1325ff756334SLois Curfman McInnes (same for all local rows) 1326ab693e5aSLois Curfman McInnes . d_nzz - number of nonzeros per row in diagonal portion of local submatrix 1327ab693e5aSLois Curfman McInnes or null (possibly different for each row). You must leave room 1328ab693e5aSLois Curfman McInnes for the diagonal entry even if it is zero. 1329ab693e5aSLois Curfman McInnes . o_nz - number of nonzeros per row in off-diagonal portion of local 1330ab693e5aSLois Curfman McInnes submatrix (same for all local rows). 1331ab693e5aSLois Curfman McInnes . o_nzz - number of nonzeros per row in off-diagonal portion of local 1332ab693e5aSLois Curfman McInnes submatrix or null (possibly different for each row). 13338a729477SBarry Smith 1334ff756334SLois Curfman McInnes Output Parameter: 13358a729477SBarry Smith . newmat - the matrix 13368a729477SBarry Smith 1337ff756334SLois Curfman McInnes Notes: 1338ff756334SLois Curfman McInnes The AIJ format (also called the Yale sparse matrix format or 1339ff756334SLois Curfman McInnes compressed row storage), is fully compatible with standard Fortran 77 13400002213bSLois Curfman McInnes storage. That is, the stored row and column indices can begin at 13410002213bSLois Curfman McInnes either one (as in Fortran) or zero. See the users manual for details. 1342ff756334SLois Curfman McInnes 1343ff756334SLois Curfman McInnes The user MUST specify either the local or global matrix dimensions 1344ff756334SLois Curfman McInnes (possibly both). 1345ff756334SLois Curfman McInnes 1346e0245417SLois Curfman McInnes Storage Information: 1347e0245417SLois Curfman McInnes For a square global matrix we define each processor's diagonal portion 1348e0245417SLois Curfman McInnes to be its local rows and the corresponding columns (a square submatrix); 1349e0245417SLois Curfman McInnes each processor's off-diagonal portion encompasses the remainder of the 1350e0245417SLois Curfman McInnes local matrix (a rectangular submatrix). 1351e0245417SLois Curfman McInnes 1352e0245417SLois Curfman McInnes The user can specify preallocated storage for the diagonal part of 13535ace5be8SLois Curfman McInnes the local submatrix with either d_nz or d_nnz (not both). Set 13545ace5be8SLois Curfman McInnes d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 13555ace5be8SLois Curfman McInnes memory allocation. Likewise, specify preallocated storage for the 13565ace5be8SLois Curfman McInnes off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1357ff756334SLois Curfman McInnes 1358c451ab25SLois Curfman McInnes By default, this format uses inodes (identical nodes) when possible. 1359c451ab25SLois Curfman McInnes We search for consecutive rows with the same nonzero structure, thereby 1360c451ab25SLois Curfman McInnes reusing matrix information to achieve increased efficiency. 1361c451ab25SLois Curfman McInnes 1362c451ab25SLois Curfman McInnes Options Database Keys: 1363c451ab25SLois Curfman McInnes $ -mat_aij_no_inode - Do not use inodes 1364c451ab25SLois Curfman McInnes $ -mat_aij_inode_limit <limit> - Set inode limit (max limit=5) 1365c451ab25SLois Curfman McInnes 1366dbd7a890SLois Curfman McInnes .keywords: matrix, aij, compressed row, sparse, parallel 1367ff756334SLois Curfman McInnes 1368fafbff53SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 13698a729477SBarry Smith @*/ 13701eb62cbbSBarry Smith int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 13711eb62cbbSBarry Smith int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *newmat) 13728a729477SBarry Smith { 13738a729477SBarry Smith Mat mat; 1374416022c9SBarry Smith Mat_MPIAIJ *a; 13756abc6512SBarry Smith int ierr, i,sum[2],work[2]; 1376416022c9SBarry Smith 13778a729477SBarry Smith *newmat = 0; 13780452661fSBarry Smith PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 1379a5a9c739SBarry Smith PLogObjectCreate(mat); 13800452661fSBarry Smith mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1381416022c9SBarry Smith PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 138244a69424SLois Curfman McInnes mat->destroy = MatDestroy_MPIAIJ; 138344a69424SLois Curfman McInnes mat->view = MatView_MPIAIJ; 13848a729477SBarry Smith mat->factor = 0; 1385c456f294SBarry Smith mat->assembled = PETSC_FALSE; 1386d6dfbf8fSBarry Smith 138764119d58SLois Curfman McInnes a->insertmode = NOT_SET_VALUES; 138817699dbbSLois Curfman McInnes MPI_Comm_rank(comm,&a->rank); 138917699dbbSLois Curfman McInnes MPI_Comm_size(comm,&a->size); 13901eb62cbbSBarry Smith 1391b4fd4287SBarry Smith if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 13921987afe7SBarry Smith SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSc decide rows but set d_nnz or o_nnz"); 13931987afe7SBarry Smith 1394dbd7a890SLois Curfman McInnes if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 13951eb62cbbSBarry Smith work[0] = m; work[1] = n; 1396d65a2f8fSBarry Smith MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1397dbd7a890SLois Curfman McInnes if (M == PETSC_DECIDE) M = sum[0]; 1398dbd7a890SLois Curfman McInnes if (N == PETSC_DECIDE) N = sum[1]; 13991eb62cbbSBarry Smith } 140017699dbbSLois Curfman McInnes if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);} 140117699dbbSLois Curfman McInnes if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);} 1402416022c9SBarry Smith a->m = m; 1403416022c9SBarry Smith a->n = n; 1404416022c9SBarry Smith a->N = N; 1405416022c9SBarry Smith a->M = M; 14061eb62cbbSBarry Smith 14071eb62cbbSBarry Smith /* build local table of row and column ownerships */ 14080452661fSBarry Smith a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1409579c6b6fSBarry Smith PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 141017699dbbSLois Curfman McInnes a->cowners = a->rowners + a->size + 1; 1411416022c9SBarry Smith MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm); 1412416022c9SBarry Smith a->rowners[0] = 0; 141317699dbbSLois Curfman McInnes for ( i=2; i<=a->size; i++ ) { 1414416022c9SBarry Smith a->rowners[i] += a->rowners[i-1]; 14158a729477SBarry Smith } 141617699dbbSLois Curfman McInnes a->rstart = a->rowners[a->rank]; 141717699dbbSLois Curfman McInnes a->rend = a->rowners[a->rank+1]; 1418416022c9SBarry Smith MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm); 1419416022c9SBarry Smith a->cowners[0] = 0; 142017699dbbSLois Curfman McInnes for ( i=2; i<=a->size; i++ ) { 1421416022c9SBarry Smith a->cowners[i] += a->cowners[i-1]; 14228a729477SBarry Smith } 142317699dbbSLois Curfman McInnes a->cstart = a->cowners[a->rank]; 142417699dbbSLois Curfman McInnes a->cend = a->cowners[a->rank+1]; 14258a729477SBarry Smith 14265ace5be8SLois Curfman McInnes if (d_nz == PETSC_DEFAULT) d_nz = 5; 1427416022c9SBarry Smith ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&a->A); CHKERRQ(ierr); 1428416022c9SBarry Smith PLogObjectParent(mat,a->A); 14297b8455f0SLois Curfman McInnes if (o_nz == PETSC_DEFAULT) o_nz = 0; 1430416022c9SBarry Smith ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&a->B); CHKERRQ(ierr); 1431416022c9SBarry Smith PLogObjectParent(mat,a->B); 14328a729477SBarry Smith 14331eb62cbbSBarry Smith /* build cache for off array entries formed */ 1434416022c9SBarry Smith ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr); 1435416022c9SBarry Smith a->colmap = 0; 1436416022c9SBarry Smith a->garray = 0; 14374b0e389bSBarry Smith a->roworiented = 1; 14388a729477SBarry Smith 14391eb62cbbSBarry Smith /* stuff used for matrix vector multiply */ 1440416022c9SBarry Smith a->lvec = 0; 1441416022c9SBarry Smith a->Mvctx = 0; 14428a729477SBarry Smith 1443d6dfbf8fSBarry Smith *newmat = mat; 1444d6dfbf8fSBarry Smith return 0; 1445d6dfbf8fSBarry Smith } 1446c74985f6SBarry Smith 14473d1612f7SBarry Smith static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1448d6dfbf8fSBarry Smith { 1449d6dfbf8fSBarry Smith Mat mat; 1450416022c9SBarry Smith Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 145125cdf11fSBarry Smith int ierr, len,flg; 1452d6dfbf8fSBarry Smith 1453416022c9SBarry Smith *newmat = 0; 14540452661fSBarry Smith PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1455a5a9c739SBarry Smith PLogObjectCreate(mat); 14560452661fSBarry Smith mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1457416022c9SBarry Smith PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 145844a69424SLois Curfman McInnes mat->destroy = MatDestroy_MPIAIJ; 145944a69424SLois Curfman McInnes mat->view = MatView_MPIAIJ; 1460d6dfbf8fSBarry Smith mat->factor = matin->factor; 1461c456f294SBarry Smith mat->assembled = PETSC_TRUE; 1462d6dfbf8fSBarry Smith 1463416022c9SBarry Smith a->m = oldmat->m; 1464416022c9SBarry Smith a->n = oldmat->n; 1465416022c9SBarry Smith a->M = oldmat->M; 1466416022c9SBarry Smith a->N = oldmat->N; 1467d6dfbf8fSBarry Smith 1468416022c9SBarry Smith a->rstart = oldmat->rstart; 1469416022c9SBarry Smith a->rend = oldmat->rend; 1470416022c9SBarry Smith a->cstart = oldmat->cstart; 1471416022c9SBarry Smith a->cend = oldmat->cend; 147217699dbbSLois Curfman McInnes a->size = oldmat->size; 147317699dbbSLois Curfman McInnes a->rank = oldmat->rank; 147464119d58SLois Curfman McInnes a->insertmode = NOT_SET_VALUES; 1475d6dfbf8fSBarry Smith 14760452661fSBarry Smith a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners); 147717699dbbSLois Curfman McInnes PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 147817699dbbSLois Curfman McInnes PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int)); 1479416022c9SBarry Smith ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 14802ee70a88SLois Curfman McInnes if (oldmat->colmap) { 14810452661fSBarry Smith a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1482416022c9SBarry Smith PLogObjectMemory(mat,(a->N)*sizeof(int)); 1483416022c9SBarry Smith PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1484416022c9SBarry Smith } else a->colmap = 0; 1485ec8511deSBarry Smith if (oldmat->garray && (len = ((Mat_SeqAIJ *) (oldmat->B->data))->n)) { 14860452661fSBarry Smith a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 1487464493b3SBarry Smith PLogObjectMemory(mat,len*sizeof(int)); 1488416022c9SBarry Smith PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1489416022c9SBarry Smith } else a->garray = 0; 1490d6dfbf8fSBarry Smith 1491416022c9SBarry Smith ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1492416022c9SBarry Smith PLogObjectParent(mat,a->lvec); 1493a56f8943SBarry Smith ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1494416022c9SBarry Smith PLogObjectParent(mat,a->Mvctx); 1495416022c9SBarry Smith ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1496416022c9SBarry Smith PLogObjectParent(mat,a->A); 1497416022c9SBarry Smith ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1498416022c9SBarry Smith PLogObjectParent(mat,a->B); 14995dd7a6c7SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 150025cdf11fSBarry Smith if (flg) { 1501682d7d0cSBarry Smith ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1502682d7d0cSBarry Smith } 15038a729477SBarry Smith *newmat = mat; 15048a729477SBarry Smith return 0; 15058a729477SBarry Smith } 1506416022c9SBarry Smith 1507416022c9SBarry Smith #include "sysio.h" 1508416022c9SBarry Smith 1509c456f294SBarry Smith int MatLoad_MPIAIJ(Viewer bview,MatType type,Mat *newmat) 1510416022c9SBarry Smith { 1511d65a2f8fSBarry Smith Mat A; 1512416022c9SBarry Smith int i, nz, ierr, j,rstart, rend, fd; 1513d65a2f8fSBarry Smith Scalar *vals,*svals; 1514416022c9SBarry Smith PetscObject vobj = (PetscObject) bview; 1515416022c9SBarry Smith MPI_Comm comm = vobj->comm; 1516416022c9SBarry Smith MPI_Status status; 151717699dbbSLois Curfman McInnes int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1518d65a2f8fSBarry Smith int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1519d65a2f8fSBarry Smith int tag = ((PetscObject)bview)->tag; 1520416022c9SBarry Smith 152117699dbbSLois Curfman McInnes MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 152217699dbbSLois Curfman McInnes if (!rank) { 1523416022c9SBarry Smith ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr); 1524416022c9SBarry Smith ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr); 1525c456f294SBarry Smith if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJ:not matrix object"); 1526416022c9SBarry Smith } 1527416022c9SBarry Smith 1528416022c9SBarry Smith MPI_Bcast(header+1,3,MPI_INT,0,comm); 1529416022c9SBarry Smith M = header[1]; N = header[2]; 1530416022c9SBarry Smith /* determine ownership of all rows */ 153117699dbbSLois Curfman McInnes m = M/size + ((M % size) > rank); 15320452661fSBarry Smith rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1533416022c9SBarry Smith MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1534416022c9SBarry Smith rowners[0] = 0; 153517699dbbSLois Curfman McInnes for ( i=2; i<=size; i++ ) { 1536416022c9SBarry Smith rowners[i] += rowners[i-1]; 1537416022c9SBarry Smith } 153817699dbbSLois Curfman McInnes rstart = rowners[rank]; 153917699dbbSLois Curfman McInnes rend = rowners[rank+1]; 1540416022c9SBarry Smith 1541416022c9SBarry Smith /* distribute row lengths to all processors */ 15420452661fSBarry Smith ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1543416022c9SBarry Smith offlens = ourlens + (rend-rstart); 154417699dbbSLois Curfman McInnes if (!rank) { 15450452661fSBarry Smith rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1546416022c9SBarry Smith ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr); 15470452661fSBarry Smith sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 154817699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1549d65a2f8fSBarry Smith MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 15500452661fSBarry Smith PetscFree(sndcounts); 1551416022c9SBarry Smith } 1552416022c9SBarry Smith else { 1553416022c9SBarry Smith MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1554416022c9SBarry Smith } 1555416022c9SBarry Smith 155617699dbbSLois Curfman McInnes if (!rank) { 1557416022c9SBarry Smith /* calculate the number of nonzeros on each processor */ 15580452661fSBarry Smith procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1559cddf8d76SBarry Smith PetscMemzero(procsnz,size*sizeof(int)); 156017699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 1561416022c9SBarry Smith for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1562416022c9SBarry Smith procsnz[i] += rowlengths[j]; 1563416022c9SBarry Smith } 1564416022c9SBarry Smith } 15650452661fSBarry Smith PetscFree(rowlengths); 1566416022c9SBarry Smith 1567416022c9SBarry Smith /* determine max buffer needed and allocate it */ 1568416022c9SBarry Smith maxnz = 0; 156917699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 15700452661fSBarry Smith maxnz = PetscMax(maxnz,procsnz[i]); 1571416022c9SBarry Smith } 15720452661fSBarry Smith cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1573416022c9SBarry Smith 1574416022c9SBarry Smith /* read in my part of the matrix column indices */ 1575416022c9SBarry Smith nz = procsnz[0]; 15760452661fSBarry Smith mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1577d65a2f8fSBarry Smith ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr); 1578d65a2f8fSBarry Smith 1579d65a2f8fSBarry Smith /* read in every one elses and ship off */ 158017699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { 1581d65a2f8fSBarry Smith nz = procsnz[i]; 1582416022c9SBarry Smith ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr); 1583d65a2f8fSBarry Smith MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1584d65a2f8fSBarry Smith } 15850452661fSBarry Smith PetscFree(cols); 1586416022c9SBarry Smith } 1587416022c9SBarry Smith else { 1588416022c9SBarry Smith /* determine buffer space needed for message */ 1589416022c9SBarry Smith nz = 0; 1590416022c9SBarry Smith for ( i=0; i<m; i++ ) { 1591416022c9SBarry Smith nz += ourlens[i]; 1592416022c9SBarry Smith } 15930452661fSBarry Smith mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1594416022c9SBarry Smith 1595416022c9SBarry Smith /* receive message of column indices*/ 1596d65a2f8fSBarry Smith MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1597416022c9SBarry Smith MPI_Get_count(&status,MPI_INT,&maxnz); 1598c456f294SBarry Smith if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file"); 1599416022c9SBarry Smith } 1600416022c9SBarry Smith 1601416022c9SBarry Smith /* loop over local rows, determining number of off diagonal entries */ 1602cddf8d76SBarry Smith PetscMemzero(offlens,m*sizeof(int)); 1603416022c9SBarry Smith jj = 0; 1604416022c9SBarry Smith for ( i=0; i<m; i++ ) { 1605416022c9SBarry Smith for ( j=0; j<ourlens[i]; j++ ) { 1606d65a2f8fSBarry Smith if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1607416022c9SBarry Smith jj++; 1608416022c9SBarry Smith } 1609416022c9SBarry Smith } 1610d65a2f8fSBarry Smith 1611d65a2f8fSBarry Smith /* create our matrix */ 1612416022c9SBarry Smith for ( i=0; i<m; i++ ) { 1613416022c9SBarry Smith ourlens[i] -= offlens[i]; 1614416022c9SBarry Smith } 1615d65a2f8fSBarry Smith ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1616d65a2f8fSBarry Smith A = *newmat; 1617d65a2f8fSBarry Smith MatSetOption(A,COLUMNS_SORTED); 1618d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 1619d65a2f8fSBarry Smith ourlens[i] += offlens[i]; 1620d65a2f8fSBarry Smith } 1621416022c9SBarry Smith 162217699dbbSLois Curfman McInnes if (!rank) { 16230452661fSBarry Smith vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1624416022c9SBarry Smith 1625416022c9SBarry Smith /* read in my part of the matrix numerical values */ 1626416022c9SBarry Smith nz = procsnz[0]; 1627416022c9SBarry Smith ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 1628d65a2f8fSBarry Smith 1629d65a2f8fSBarry Smith /* insert into matrix */ 1630d65a2f8fSBarry Smith jj = rstart; 1631d65a2f8fSBarry Smith smycols = mycols; 1632d65a2f8fSBarry Smith svals = vals; 1633d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 1634d65a2f8fSBarry Smith ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1635d65a2f8fSBarry Smith smycols += ourlens[i]; 1636d65a2f8fSBarry Smith svals += ourlens[i]; 1637d65a2f8fSBarry Smith jj++; 1638416022c9SBarry Smith } 1639416022c9SBarry Smith 1640d65a2f8fSBarry Smith /* read in other processors and ship out */ 164117699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { 1642416022c9SBarry Smith nz = procsnz[i]; 1643416022c9SBarry Smith ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 1644d65a2f8fSBarry Smith MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1645416022c9SBarry Smith } 16460452661fSBarry Smith PetscFree(procsnz); 1647416022c9SBarry Smith } 1648d65a2f8fSBarry Smith else { 1649d65a2f8fSBarry Smith /* receive numeric values */ 16500452661fSBarry Smith vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1651416022c9SBarry Smith 1652d65a2f8fSBarry Smith /* receive message of values*/ 1653d65a2f8fSBarry Smith MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1654d65a2f8fSBarry Smith MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1655c456f294SBarry Smith if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file"); 1656d65a2f8fSBarry Smith 1657d65a2f8fSBarry Smith /* insert into matrix */ 1658d65a2f8fSBarry Smith jj = rstart; 1659d65a2f8fSBarry Smith smycols = mycols; 1660d65a2f8fSBarry Smith svals = vals; 1661d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 1662d65a2f8fSBarry Smith ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1663d65a2f8fSBarry Smith smycols += ourlens[i]; 1664d65a2f8fSBarry Smith svals += ourlens[i]; 1665d65a2f8fSBarry Smith jj++; 1666d65a2f8fSBarry Smith } 1667d65a2f8fSBarry Smith } 16680452661fSBarry Smith PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1669d65a2f8fSBarry Smith 1670d65a2f8fSBarry Smith ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1671d65a2f8fSBarry Smith ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1672416022c9SBarry Smith return 0; 1673416022c9SBarry Smith } 1674