1cb512458SBarry Smith #ifndef lint 2*b810aeb4SBarry Smith static char vcid[] = "$Id: mpiaij.c,v 1.118 1996/01/31 17:34:55 bsmith Exp bsmith $"; 3cb512458SBarry Smith #endif 48a729477SBarry Smith 51eb62cbbSBarry Smith #include "mpiaij.h" 68a729477SBarry Smith #include "vec/vecimpl.h" 7d6dfbf8fSBarry Smith #include "inline/spops.h" 88a729477SBarry Smith 99e25ed09SBarry Smith /* local utility routine that creates a mapping from the global column 109e25ed09SBarry Smith number to the local number in the off-diagonal part of the local 119e25ed09SBarry Smith storage of the matrix. This is done in a non scable way since the 129e25ed09SBarry Smith length of colmap equals the global matrix length. 139e25ed09SBarry Smith */ 14b7c46309SBarry Smith static int CreateColmap_Private(Mat mat) 159e25ed09SBarry Smith { 1644a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 17ec8511deSBarry Smith Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 18416022c9SBarry Smith int n = B->n,i,shift = B->indexshift; 19dbb450caSBarry Smith 200452661fSBarry Smith aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap); 21464493b3SBarry Smith PLogObjectMemory(mat,aij->N*sizeof(int)); 22cddf8d76SBarry Smith PetscMemzero(aij->colmap,aij->N*sizeof(int)); 23dbb450caSBarry Smith for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i-shift; 249e25ed09SBarry Smith return 0; 259e25ed09SBarry Smith } 269e25ed09SBarry Smith 272493cbb0SBarry Smith extern int DisAssemble_MPIAIJ(Mat); 282493cbb0SBarry Smith 2951c98ccdSLois Curfman McInnes static int MatGetReordering_MPIAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm) 30299609e3SLois Curfman McInnes { 31299609e3SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 32299609e3SLois Curfman McInnes int ierr; 3317699dbbSLois Curfman McInnes if (aij->size == 1) { 3451c98ccdSLois Curfman McInnes ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr); 3548d91487SBarry Smith } else SETERRQ(1,"MatGetReordering_MPIAIJ:not supported in parallel"); 36299609e3SLois Curfman McInnes return 0; 37299609e3SLois Curfman McInnes } 38299609e3SLois Curfman McInnes 394b0e389bSBarry Smith static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 408a729477SBarry Smith { 4144a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 42dbb450caSBarry Smith Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data; 434b0e389bSBarry Smith Scalar value; 441eb62cbbSBarry Smith int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 451eb62cbbSBarry Smith int cstart = aij->cstart, cend = aij->cend,row,col; 464b0e389bSBarry Smith int shift = C->indexshift,roworiented = aij->roworiented; 478a729477SBarry Smith 4864119d58SLois Curfman McInnes if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) { 4948d91487SBarry Smith SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds"); 508a729477SBarry Smith } 511eb62cbbSBarry Smith aij->insertmode = addv; 528a729477SBarry Smith for ( i=0; i<m; i++ ) { 534b0e389bSBarry Smith if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row"); 544b0e389bSBarry Smith if (im[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large"); 554b0e389bSBarry Smith if (im[i] >= rstart && im[i] < rend) { 564b0e389bSBarry Smith row = im[i] - rstart; 571eb62cbbSBarry Smith for ( j=0; j<n; j++ ) { 584b0e389bSBarry Smith if (in[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column"); 594b0e389bSBarry Smith if (in[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large"); 604b0e389bSBarry Smith if (in[j] >= cstart && in[j] < cend){ 614b0e389bSBarry Smith col = in[j] - cstart; 624b0e389bSBarry Smith if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 634b0e389bSBarry Smith ierr = MatSetValues(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 641eb62cbbSBarry Smith } 651eb62cbbSBarry Smith else { 66227d817aSBarry Smith if (mat->was_assembled) { 67b7c46309SBarry Smith if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 684b0e389bSBarry Smith col = aij->colmap[in[j]] + shift; 69ec8511deSBarry Smith if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 702493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 714b0e389bSBarry Smith col = in[j]; 72d6dfbf8fSBarry Smith } 739e25ed09SBarry Smith } 744b0e389bSBarry Smith else col = in[j]; 754b0e389bSBarry Smith if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 764b0e389bSBarry Smith ierr = MatSetValues(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 771eb62cbbSBarry Smith } 781eb62cbbSBarry Smith } 791eb62cbbSBarry Smith } 801eb62cbbSBarry Smith else { 814b0e389bSBarry Smith if (roworiented) { 824b0e389bSBarry Smith ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 834b0e389bSBarry Smith } 844b0e389bSBarry Smith else { 854b0e389bSBarry Smith row = im[i]; 864b0e389bSBarry Smith for ( j=0; j<n; j++ ) { 874b0e389bSBarry Smith ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 884b0e389bSBarry Smith } 894b0e389bSBarry Smith } 901eb62cbbSBarry Smith } 918a729477SBarry Smith } 928a729477SBarry Smith return 0; 938a729477SBarry Smith } 948a729477SBarry Smith 95b49de8d1SLois Curfman McInnes static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 96b49de8d1SLois Curfman McInnes { 97b49de8d1SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 98b49de8d1SLois Curfman McInnes Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data; 99b49de8d1SLois Curfman McInnes int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 100b49de8d1SLois Curfman McInnes int cstart = aij->cstart, cend = aij->cend,row,col; 101b49de8d1SLois Curfman McInnes int shift = C->indexshift; 102b49de8d1SLois Curfman McInnes 103b49de8d1SLois Curfman McInnes for ( i=0; i<m; i++ ) { 104b49de8d1SLois Curfman McInnes if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative row"); 105b49de8d1SLois Curfman McInnes if (idxm[i] >= aij->M) SETERRQ(1,"MatGetValues_MPIAIJ:Row too large"); 106b49de8d1SLois Curfman McInnes if (idxm[i] >= rstart && idxm[i] < rend) { 107b49de8d1SLois Curfman McInnes row = idxm[i] - rstart; 108b49de8d1SLois Curfman McInnes for ( j=0; j<n; j++ ) { 109b49de8d1SLois Curfman McInnes if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative column"); 110b49de8d1SLois Curfman McInnes if (idxn[j] >= aij->N) SETERRQ(1,"MatGetValues_MPIAIJ:Col too large"); 111b49de8d1SLois Curfman McInnes if (idxn[j] >= cstart && idxn[j] < cend){ 112b49de8d1SLois Curfman McInnes col = idxn[j] - cstart; 113b49de8d1SLois Curfman McInnes ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 114b49de8d1SLois Curfman McInnes } 115b49de8d1SLois Curfman McInnes else { 116b49de8d1SLois Curfman McInnes col = aij->colmap[idxn[j]] + shift; 117b49de8d1SLois Curfman McInnes ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 118b49de8d1SLois Curfman McInnes } 119b49de8d1SLois Curfman McInnes } 120b49de8d1SLois Curfman McInnes } 121b49de8d1SLois Curfman McInnes else { 122b49de8d1SLois Curfman McInnes SETERRQ(1,"MatGetValues_MPIAIJ:Only local values currently supported"); 123b49de8d1SLois Curfman McInnes } 124b49de8d1SLois Curfman McInnes } 125b49de8d1SLois Curfman McInnes return 0; 126b49de8d1SLois Curfman McInnes } 127b49de8d1SLois Curfman McInnes 128fc76ce87SLois Curfman McInnes static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 1298a729477SBarry Smith { 13044a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 131d6dfbf8fSBarry Smith MPI_Comm comm = mat->comm; 13217699dbbSLois Curfman McInnes int size = aij->size, *owners = aij->rowners; 13317699dbbSLois Curfman McInnes int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 1341eb62cbbSBarry Smith MPI_Request *send_waits,*recv_waits; 1356abc6512SBarry Smith int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 1361eb62cbbSBarry Smith InsertMode addv; 1371eb62cbbSBarry Smith Scalar *rvalues,*svalues; 1381eb62cbbSBarry Smith 1391eb62cbbSBarry Smith /* make sure all processors are either in INSERTMODE or ADDMODE */ 140d65a2f8fSBarry Smith MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 141dbb450caSBarry Smith if (addv == (ADD_VALUES|INSERT_VALUES)) { 142bbb6d6a8SBarry Smith SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added"); 1431eb62cbbSBarry Smith } 1441eb62cbbSBarry Smith aij->insertmode = addv; /* in case this processor had no cache */ 1451eb62cbbSBarry Smith 1461eb62cbbSBarry Smith /* first count number of contributors to each processor */ 1470452661fSBarry Smith nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 148cddf8d76SBarry Smith PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 1490452661fSBarry Smith owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 1501eb62cbbSBarry Smith for ( i=0; i<aij->stash.n; i++ ) { 1511eb62cbbSBarry Smith idx = aij->stash.idx[i]; 15217699dbbSLois Curfman McInnes for ( j=0; j<size; j++ ) { 1531eb62cbbSBarry Smith if (idx >= owners[j] && idx < owners[j+1]) { 1541eb62cbbSBarry Smith nprocs[j]++; procs[j] = 1; owner[i] = j; break; 1558a729477SBarry Smith } 1568a729477SBarry Smith } 1578a729477SBarry Smith } 15817699dbbSLois Curfman McInnes nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 1591eb62cbbSBarry Smith 1601eb62cbbSBarry Smith /* inform other processors of number of messages and max length*/ 1610452661fSBarry Smith work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 16217699dbbSLois Curfman McInnes MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 16317699dbbSLois Curfman McInnes nreceives = work[rank]; 16417699dbbSLois Curfman McInnes MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 16517699dbbSLois Curfman McInnes nmax = work[rank]; 1660452661fSBarry Smith PetscFree(work); 1671eb62cbbSBarry Smith 1681eb62cbbSBarry Smith /* post receives: 1691eb62cbbSBarry Smith 1) each message will consist of ordered pairs 1701eb62cbbSBarry Smith (global index,value) we store the global index as a double 171d6dfbf8fSBarry Smith to simplify the message passing. 1721eb62cbbSBarry Smith 2) since we don't know how long each individual message is we 1731eb62cbbSBarry Smith allocate the largest needed buffer for each receive. Potentially 1741eb62cbbSBarry Smith this is a lot of wasted space. 1751eb62cbbSBarry Smith 1761eb62cbbSBarry Smith 1771eb62cbbSBarry Smith This could be done better. 1781eb62cbbSBarry Smith */ 1790452661fSBarry Smith rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 18078b31e54SBarry Smith CHKPTRQ(rvalues); 1810452661fSBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 18278b31e54SBarry Smith CHKPTRQ(recv_waits); 1831eb62cbbSBarry Smith for ( i=0; i<nreceives; i++ ) { 184416022c9SBarry Smith MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 1851eb62cbbSBarry Smith comm,recv_waits+i); 1861eb62cbbSBarry Smith } 1871eb62cbbSBarry Smith 1881eb62cbbSBarry Smith /* do sends: 1891eb62cbbSBarry Smith 1) starts[i] gives the starting index in svalues for stuff going to 1901eb62cbbSBarry Smith the ith processor 1911eb62cbbSBarry Smith */ 1920452661fSBarry Smith svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 1930452661fSBarry Smith send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 19478b31e54SBarry Smith CHKPTRQ(send_waits); 1950452661fSBarry Smith starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 1961eb62cbbSBarry Smith starts[0] = 0; 19717699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1981eb62cbbSBarry Smith for ( i=0; i<aij->stash.n; i++ ) { 1991eb62cbbSBarry Smith svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 2001eb62cbbSBarry Smith svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 2011eb62cbbSBarry Smith svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 2021eb62cbbSBarry Smith } 2030452661fSBarry Smith PetscFree(owner); 2041eb62cbbSBarry Smith starts[0] = 0; 20517699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 2061eb62cbbSBarry Smith count = 0; 20717699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 2081eb62cbbSBarry Smith if (procs[i]) { 209416022c9SBarry Smith MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 2101eb62cbbSBarry Smith comm,send_waits+count++); 2111eb62cbbSBarry Smith } 2121eb62cbbSBarry Smith } 2130452661fSBarry Smith PetscFree(starts); PetscFree(nprocs); 2141eb62cbbSBarry Smith 2151eb62cbbSBarry Smith /* Free cache space */ 2162191d07cSBarry Smith PLogInfo(0,"[%d]MatAssemblyBegin_MPIAIJ:Number of off processor values %d\n",rank,aij->stash.n); 21778b31e54SBarry Smith ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 2181eb62cbbSBarry Smith 2191eb62cbbSBarry Smith aij->svalues = svalues; aij->rvalues = rvalues; 2201eb62cbbSBarry Smith aij->nsends = nsends; aij->nrecvs = nreceives; 2211eb62cbbSBarry Smith aij->send_waits = send_waits; aij->recv_waits = recv_waits; 2221eb62cbbSBarry Smith aij->rmax = nmax; 2231eb62cbbSBarry Smith 2241eb62cbbSBarry Smith return 0; 2251eb62cbbSBarry Smith } 22644a69424SLois Curfman McInnes extern int MatSetUpMultiply_MPIAIJ(Mat); 2271eb62cbbSBarry Smith 228fc76ce87SLois Curfman McInnes static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 2291eb62cbbSBarry Smith { 23044a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 231dbb450caSBarry Smith Mat_SeqAIJ *C = (Mat_SeqAIJ *) aij->A->data; 2321eb62cbbSBarry Smith MPI_Status *send_status,recv_status; 233416022c9SBarry Smith int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 234416022c9SBarry Smith int row,col,other_disassembled,shift = C->indexshift; 2351eb62cbbSBarry Smith Scalar *values,val; 2361eb62cbbSBarry Smith InsertMode addv = aij->insertmode; 2371eb62cbbSBarry Smith 2381eb62cbbSBarry Smith /* wait on receives */ 2391eb62cbbSBarry Smith while (count) { 240d6dfbf8fSBarry Smith MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 2411eb62cbbSBarry Smith /* unpack receives into our local space */ 242d6dfbf8fSBarry Smith values = aij->rvalues + 3*imdex*aij->rmax; 243c60448a5SBarry Smith MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 2441eb62cbbSBarry Smith n = n/3; 2451eb62cbbSBarry Smith for ( i=0; i<n; i++ ) { 246227d817aSBarry Smith row = (int) PetscReal(values[3*i]) - aij->rstart; 247227d817aSBarry Smith col = (int) PetscReal(values[3*i+1]); 2481eb62cbbSBarry Smith val = values[3*i+2]; 2491eb62cbbSBarry Smith if (col >= aij->cstart && col < aij->cend) { 2501eb62cbbSBarry Smith col -= aij->cstart; 2511eb62cbbSBarry Smith MatSetValues(aij->A,1,&row,1,&col,&val,addv); 2521eb62cbbSBarry Smith } 2531eb62cbbSBarry Smith else { 254227d817aSBarry Smith if (mat->was_assembled) { 255b7c46309SBarry Smith if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 256dbb450caSBarry Smith col = aij->colmap[col] + shift; 257ec8511deSBarry Smith if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 2582493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 259227d817aSBarry Smith col = (int) PetscReal(values[3*i+1]); 260d6dfbf8fSBarry Smith } 2619e25ed09SBarry Smith } 2621eb62cbbSBarry Smith MatSetValues(aij->B,1,&row,1,&col,&val,addv); 2631eb62cbbSBarry Smith } 2641eb62cbbSBarry Smith } 2651eb62cbbSBarry Smith count--; 2661eb62cbbSBarry Smith } 2670452661fSBarry Smith PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 2681eb62cbbSBarry Smith 2691eb62cbbSBarry Smith /* wait on sends */ 2701eb62cbbSBarry Smith if (aij->nsends) { 2710452661fSBarry Smith send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status)); 27278b31e54SBarry Smith CHKPTRQ(send_status); 2731eb62cbbSBarry Smith MPI_Waitall(aij->nsends,aij->send_waits,send_status); 2740452661fSBarry Smith PetscFree(send_status); 2751eb62cbbSBarry Smith } 2760452661fSBarry Smith PetscFree(aij->send_waits); PetscFree(aij->svalues); 2771eb62cbbSBarry Smith 27864119d58SLois Curfman McInnes aij->insertmode = NOT_SET_VALUES; 27978b31e54SBarry Smith ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 28078b31e54SBarry Smith ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 2811eb62cbbSBarry Smith 2822493cbb0SBarry Smith /* determine if any processor has disassembled, if so we must 2832493cbb0SBarry Smith also disassemble ourselfs, in order that we may reassemble. */ 284227d817aSBarry Smith MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 285227d817aSBarry Smith if (mat->was_assembled && !other_disassembled) { 2862493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 2872493cbb0SBarry Smith } 2882493cbb0SBarry Smith 289227d817aSBarry Smith if (!mat->was_assembled && mode == FINAL_ASSEMBLY) { 29078b31e54SBarry Smith ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 2915e42470aSBarry Smith } 29278b31e54SBarry Smith ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 29378b31e54SBarry Smith ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 2945e42470aSBarry Smith 2958a729477SBarry Smith return 0; 2968a729477SBarry Smith } 2978a729477SBarry Smith 2982ee70a88SLois Curfman McInnes static int MatZeroEntries_MPIAIJ(Mat A) 2991eb62cbbSBarry Smith { 30044a69424SLois Curfman McInnes Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 301dbd7a890SLois Curfman McInnes int ierr; 30278b31e54SBarry Smith ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 30378b31e54SBarry Smith ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 3041eb62cbbSBarry Smith return 0; 3051eb62cbbSBarry Smith } 3061eb62cbbSBarry Smith 3071eb62cbbSBarry Smith /* the code does not do the diagonal entries correctly unless the 3081eb62cbbSBarry Smith matrix is square and the column and row owerships are identical. 3091eb62cbbSBarry Smith This is a BUG. The only way to fix it seems to be to access 3101eb62cbbSBarry Smith aij->A and aij->B directly and not through the MatZeroRows() 3111eb62cbbSBarry Smith routine. 3121eb62cbbSBarry Smith */ 31344a69424SLois Curfman McInnes static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 3141eb62cbbSBarry Smith { 31544a69424SLois Curfman McInnes Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 31617699dbbSLois Curfman McInnes int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 3176abc6512SBarry Smith int *procs,*nprocs,j,found,idx,nsends,*work; 31817699dbbSLois Curfman McInnes int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 3195392566eSBarry Smith int *rvalues,tag = A->tag,count,base,slen,n,*source; 320d6dfbf8fSBarry Smith int *lens,imdex,*lrows,*values; 321d6dfbf8fSBarry Smith MPI_Comm comm = A->comm; 3221eb62cbbSBarry Smith MPI_Request *send_waits,*recv_waits; 3231eb62cbbSBarry Smith MPI_Status recv_status,*send_status; 3241eb62cbbSBarry Smith IS istmp; 3251eb62cbbSBarry Smith 32678b31e54SBarry Smith ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr); 32778b31e54SBarry Smith ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 3281eb62cbbSBarry Smith 3291eb62cbbSBarry Smith /* first count number of contributors to each processor */ 3300452661fSBarry Smith nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 331cddf8d76SBarry Smith PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 3320452661fSBarry Smith owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 3331eb62cbbSBarry Smith for ( i=0; i<N; i++ ) { 3341eb62cbbSBarry Smith idx = rows[i]; 3351eb62cbbSBarry Smith found = 0; 33617699dbbSLois Curfman McInnes for ( j=0; j<size; j++ ) { 3371eb62cbbSBarry Smith if (idx >= owners[j] && idx < owners[j+1]) { 3381eb62cbbSBarry Smith nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 3391eb62cbbSBarry Smith } 3401eb62cbbSBarry Smith } 341bbb6d6a8SBarry Smith if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range"); 3421eb62cbbSBarry Smith } 34317699dbbSLois Curfman McInnes nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 3441eb62cbbSBarry Smith 3451eb62cbbSBarry Smith /* inform other processors of number of messages and max length*/ 3460452661fSBarry Smith work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 34717699dbbSLois Curfman McInnes MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 34817699dbbSLois Curfman McInnes nrecvs = work[rank]; 34917699dbbSLois Curfman McInnes MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 35017699dbbSLois Curfman McInnes nmax = work[rank]; 3510452661fSBarry Smith PetscFree(work); 3521eb62cbbSBarry Smith 3531eb62cbbSBarry Smith /* post receives: */ 3540452661fSBarry Smith rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 35578b31e54SBarry Smith CHKPTRQ(rvalues); 3560452661fSBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 35778b31e54SBarry Smith CHKPTRQ(recv_waits); 3581eb62cbbSBarry Smith for ( i=0; i<nrecvs; i++ ) { 359416022c9SBarry Smith MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 3601eb62cbbSBarry Smith } 3611eb62cbbSBarry Smith 3621eb62cbbSBarry Smith /* do sends: 3631eb62cbbSBarry Smith 1) starts[i] gives the starting index in svalues for stuff going to 3641eb62cbbSBarry Smith the ith processor 3651eb62cbbSBarry Smith */ 3660452661fSBarry Smith svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 3670452661fSBarry Smith send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 36878b31e54SBarry Smith CHKPTRQ(send_waits); 3690452661fSBarry Smith starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 3701eb62cbbSBarry Smith starts[0] = 0; 37117699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 3721eb62cbbSBarry Smith for ( i=0; i<N; i++ ) { 3731eb62cbbSBarry Smith svalues[starts[owner[i]]++] = rows[i]; 3741eb62cbbSBarry Smith } 3751eb62cbbSBarry Smith ISRestoreIndices(is,&rows); 3761eb62cbbSBarry Smith 3771eb62cbbSBarry Smith starts[0] = 0; 37817699dbbSLois Curfman McInnes for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 3791eb62cbbSBarry Smith count = 0; 38017699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 3811eb62cbbSBarry Smith if (procs[i]) { 382416022c9SBarry Smith MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 3831eb62cbbSBarry Smith } 3841eb62cbbSBarry Smith } 3850452661fSBarry Smith PetscFree(starts); 3861eb62cbbSBarry Smith 38717699dbbSLois Curfman McInnes base = owners[rank]; 3881eb62cbbSBarry Smith 3891eb62cbbSBarry Smith /* wait on receives */ 3900452661fSBarry Smith lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 3911eb62cbbSBarry Smith source = lens + nrecvs; 3921eb62cbbSBarry Smith count = nrecvs; slen = 0; 3931eb62cbbSBarry Smith while (count) { 394d6dfbf8fSBarry Smith MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 3951eb62cbbSBarry Smith /* unpack receives into our local space */ 3961eb62cbbSBarry Smith MPI_Get_count(&recv_status,MPI_INT,&n); 397d6dfbf8fSBarry Smith source[imdex] = recv_status.MPI_SOURCE; 398d6dfbf8fSBarry Smith lens[imdex] = n; 3991eb62cbbSBarry Smith slen += n; 4001eb62cbbSBarry Smith count--; 4011eb62cbbSBarry Smith } 4020452661fSBarry Smith PetscFree(recv_waits); 4031eb62cbbSBarry Smith 4041eb62cbbSBarry Smith /* move the data into the send scatter */ 4050452661fSBarry Smith lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 4061eb62cbbSBarry Smith count = 0; 4071eb62cbbSBarry Smith for ( i=0; i<nrecvs; i++ ) { 4081eb62cbbSBarry Smith values = rvalues + i*nmax; 4091eb62cbbSBarry Smith for ( j=0; j<lens[i]; j++ ) { 4101eb62cbbSBarry Smith lrows[count++] = values[j] - base; 4111eb62cbbSBarry Smith } 4121eb62cbbSBarry Smith } 4130452661fSBarry Smith PetscFree(rvalues); PetscFree(lens); 4140452661fSBarry Smith PetscFree(owner); PetscFree(nprocs); 4151eb62cbbSBarry Smith 4161eb62cbbSBarry Smith /* actually zap the local rows */ 417416022c9SBarry Smith ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 418464493b3SBarry Smith PLogObjectParent(A,istmp); 4190452661fSBarry Smith PetscFree(lrows); 42078b31e54SBarry Smith ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 42178b31e54SBarry Smith ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 42278b31e54SBarry Smith ierr = ISDestroy(istmp); CHKERRQ(ierr); 4231eb62cbbSBarry Smith 4241eb62cbbSBarry Smith /* wait on sends */ 4251eb62cbbSBarry Smith if (nsends) { 4260452661fSBarry Smith send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 42778b31e54SBarry Smith CHKPTRQ(send_status); 4281eb62cbbSBarry Smith MPI_Waitall(nsends,send_waits,send_status); 4290452661fSBarry Smith PetscFree(send_status); 4301eb62cbbSBarry Smith } 4310452661fSBarry Smith PetscFree(send_waits); PetscFree(svalues); 4321eb62cbbSBarry Smith 4331eb62cbbSBarry Smith return 0; 4341eb62cbbSBarry Smith } 4351eb62cbbSBarry Smith 436416022c9SBarry Smith static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 4371eb62cbbSBarry Smith { 438416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 4391eb62cbbSBarry Smith int ierr; 440416022c9SBarry Smith 44164119d58SLois Curfman McInnes ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 442416022c9SBarry Smith ierr = MatMult(a->A,xx,yy); CHKERRQ(ierr); 44364119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 444416022c9SBarry Smith ierr = MatMultAdd(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 4451eb62cbbSBarry Smith return 0; 4461eb62cbbSBarry Smith } 4471eb62cbbSBarry Smith 448416022c9SBarry Smith static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 449da3a660dSBarry Smith { 450416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 451da3a660dSBarry Smith int ierr; 45264119d58SLois Curfman McInnes ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 453416022c9SBarry Smith ierr = MatMultAdd(a->A,xx,yy,zz); CHKERRQ(ierr); 45464119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 455416022c9SBarry Smith ierr = MatMultAdd(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 456da3a660dSBarry Smith return 0; 457da3a660dSBarry Smith } 458da3a660dSBarry Smith 459416022c9SBarry Smith static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 460da3a660dSBarry Smith { 461416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 462da3a660dSBarry Smith int ierr; 463da3a660dSBarry Smith 464da3a660dSBarry Smith /* do nondiagonal part */ 465416022c9SBarry Smith ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr); 466da3a660dSBarry Smith /* send it on its way */ 467416022c9SBarry Smith ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES, 46864119d58SLois Curfman McInnes (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 469da3a660dSBarry Smith /* do local part */ 470416022c9SBarry Smith ierr = MatMultTrans(a->A,xx,yy); CHKERRQ(ierr); 471da3a660dSBarry Smith /* receive remote parts: note this assumes the values are not actually */ 472da3a660dSBarry Smith /* inserted in yy until the next line, which is true for my implementation*/ 473da3a660dSBarry Smith /* but is not perhaps always true. */ 474416022c9SBarry Smith ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES, 47564119d58SLois Curfman McInnes (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 476da3a660dSBarry Smith return 0; 477da3a660dSBarry Smith } 478da3a660dSBarry Smith 479416022c9SBarry Smith static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 480da3a660dSBarry Smith { 481416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 482da3a660dSBarry Smith int ierr; 483da3a660dSBarry Smith 484da3a660dSBarry Smith /* do nondiagonal part */ 485416022c9SBarry Smith ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr); 486da3a660dSBarry Smith /* send it on its way */ 487416022c9SBarry Smith ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES, 48864119d58SLois Curfman McInnes (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 489da3a660dSBarry Smith /* do local part */ 490416022c9SBarry Smith ierr = MatMultTransAdd(a->A,xx,yy,zz); CHKERRQ(ierr); 491da3a660dSBarry Smith /* receive remote parts: note this assumes the values are not actually */ 492da3a660dSBarry Smith /* inserted in yy until the next line, which is true for my implementation*/ 493da3a660dSBarry Smith /* but is not perhaps always true. */ 494416022c9SBarry Smith ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES, 49564119d58SLois Curfman McInnes (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 496da3a660dSBarry Smith return 0; 497da3a660dSBarry Smith } 498da3a660dSBarry Smith 4991eb62cbbSBarry Smith /* 5001eb62cbbSBarry Smith This only works correctly for square matrices where the subblock A->A is the 5011eb62cbbSBarry Smith diagonal block 5021eb62cbbSBarry Smith */ 503416022c9SBarry Smith static int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 5041eb62cbbSBarry Smith { 505416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 506416022c9SBarry Smith return MatGetDiagonal(a->A,v); 5071eb62cbbSBarry Smith } 5081eb62cbbSBarry Smith 509052efed2SBarry Smith static int MatScale_MPIAIJ(Scalar *aa,Mat A) 510052efed2SBarry Smith { 511052efed2SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 512052efed2SBarry Smith int ierr; 513052efed2SBarry Smith ierr = MatScale(aa,a->A); CHKERRQ(ierr); 514052efed2SBarry Smith ierr = MatScale(aa,a->B); CHKERRQ(ierr); 515052efed2SBarry Smith return 0; 516052efed2SBarry Smith } 517052efed2SBarry Smith 51844a69424SLois Curfman McInnes static int MatDestroy_MPIAIJ(PetscObject obj) 5191eb62cbbSBarry Smith { 5201eb62cbbSBarry Smith Mat mat = (Mat) obj; 52144a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 5221eb62cbbSBarry Smith int ierr; 523a5a9c739SBarry Smith #if defined(PETSC_LOG) 5246e35fa57SLois Curfman McInnes PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 525a5a9c739SBarry Smith #endif 5260452661fSBarry Smith PetscFree(aij->rowners); 52778b31e54SBarry Smith ierr = MatDestroy(aij->A); CHKERRQ(ierr); 52878b31e54SBarry Smith ierr = MatDestroy(aij->B); CHKERRQ(ierr); 5290452661fSBarry Smith if (aij->colmap) PetscFree(aij->colmap); 5300452661fSBarry Smith if (aij->garray) PetscFree(aij->garray); 5311eb62cbbSBarry Smith if (aij->lvec) VecDestroy(aij->lvec); 532a56f8943SBarry Smith if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 5330452661fSBarry Smith PetscFree(aij); 534a5a9c739SBarry Smith PLogObjectDestroy(mat); 5350452661fSBarry Smith PetscHeaderDestroy(mat); 5361eb62cbbSBarry Smith return 0; 5371eb62cbbSBarry Smith } 5387857610eSBarry Smith #include "draw.h" 539b16a3bb1SBarry Smith #include "pinclude/pviewer.h" 540ee50ffe9SBarry Smith 541416022c9SBarry Smith static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 5421eb62cbbSBarry Smith { 543416022c9SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 544416022c9SBarry Smith int ierr; 545416022c9SBarry Smith 54617699dbbSLois Curfman McInnes if (aij->size == 1) { 547416022c9SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 548416022c9SBarry Smith } 549a523beb4SLois Curfman McInnes else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported"); 550416022c9SBarry Smith return 0; 551416022c9SBarry Smith } 552416022c9SBarry Smith 553d7e8b826SBarry Smith static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 554416022c9SBarry Smith { 55544a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 556dbb450caSBarry Smith Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 557a56f8943SBarry Smith int ierr, format,shift = C->indexshift,rank; 558d13ab20cSBarry Smith PetscObject vobj = (PetscObject) viewer; 559d636dbe3SBarry Smith FILE *fd; 560416022c9SBarry Smith 561416022c9SBarry Smith if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) { 562416022c9SBarry Smith ierr = ViewerFileGetFormat_Private(viewer,&format); 56308480c60SBarry Smith if (format == FILE_FORMAT_INFO_DETAILED) { 564a56f8943SBarry Smith int nz,nzalloc,mem; 565a56f8943SBarry Smith MPI_Comm_rank(mat->comm,&rank); 566227d817aSBarry Smith ierr = ViewerFileGetPointer(viewer,&fd); CHKERRQ(ierr); 567a56f8943SBarry Smith ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem); 568a56f8943SBarry Smith MPIU_Seq_begin(mat->comm,1); 569a56f8943SBarry Smith fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d \n",rank,aij->m,nz, 570a56f8943SBarry Smith nzalloc,mem); 57108480c60SBarry Smith ierr = MatGetInfo(aij->A,MAT_LOCAL,&nz,&nzalloc,&mem); 57208480c60SBarry Smith fprintf(fd,"[%d] on diagonal nz %d \n",rank,nz); 57308480c60SBarry Smith ierr = MatGetInfo(aij->B,MAT_LOCAL,&nz,&nzalloc,&mem); 57408480c60SBarry Smith fprintf(fd,"[%d] off diagonal nz %d \n",rank,nz); 575a56f8943SBarry Smith fflush(fd); 576a56f8943SBarry Smith MPIU_Seq_end(mat->comm,1); 577a40aa06bSLois Curfman McInnes ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 578416022c9SBarry Smith return 0; 579416022c9SBarry Smith } 58008480c60SBarry Smith else if (format == FILE_FORMAT_INFO) { 58108480c60SBarry Smith return 0; 58208480c60SBarry Smith } 583416022c9SBarry Smith } 584416022c9SBarry Smith 585416022c9SBarry Smith if (vobj->type == ASCII_FILE_VIEWER) { 586227d817aSBarry Smith ierr = ViewerFileGetPointer(viewer,&fd); CHKERRQ(ierr); 5877f813858SBarry Smith MPIU_Seq_begin(mat->comm,1); 588d13ab20cSBarry Smith fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 58917699dbbSLois Curfman McInnes aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 5901eb62cbbSBarry Smith aij->cend); 59178b31e54SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 59278b31e54SBarry Smith ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 593d13ab20cSBarry Smith fflush(fd); 5947f813858SBarry Smith MPIU_Seq_end(mat->comm,1); 595d13ab20cSBarry Smith } 596416022c9SBarry Smith else { 597a56f8943SBarry Smith int size = aij->size; 598a56f8943SBarry Smith rank = aij->rank; 59917699dbbSLois Curfman McInnes if (size == 1) { 60078b31e54SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 60195373324SBarry Smith } 60295373324SBarry Smith else { 60395373324SBarry Smith /* assemble the entire matrix onto first processor. */ 60495373324SBarry Smith Mat A; 605ec8511deSBarry Smith Mat_SeqAIJ *Aloc; 6062eb8c8abSBarry Smith int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 60795373324SBarry Smith Scalar *a; 6082ee70a88SLois Curfman McInnes 60917699dbbSLois Curfman McInnes if (!rank) { 610b4fd4287SBarry Smith ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 611c451ab25SLois Curfman McInnes CHKERRQ(ierr); 61295373324SBarry Smith } 61395373324SBarry Smith else { 614b4fd4287SBarry Smith ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 615c451ab25SLois Curfman McInnes CHKERRQ(ierr); 61695373324SBarry Smith } 617464493b3SBarry Smith PLogObjectParent(mat,A); 618416022c9SBarry Smith 61995373324SBarry Smith /* copy over the A part */ 620ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) aij->A->data; 6212ee70a88SLois Curfman McInnes m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 62295373324SBarry Smith row = aij->rstart; 623dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 62495373324SBarry Smith for ( i=0; i<m; i++ ) { 625416022c9SBarry Smith ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 62695373324SBarry Smith row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 62795373324SBarry Smith } 6282ee70a88SLois Curfman McInnes aj = Aloc->j; 629dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 63095373324SBarry Smith 63195373324SBarry Smith /* copy over the B part */ 632ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) aij->B->data; 6332ee70a88SLois Curfman McInnes m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 63495373324SBarry Smith row = aij->rstart; 6350452661fSBarry Smith ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 636dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 63795373324SBarry Smith for ( i=0; i<m; i++ ) { 638416022c9SBarry Smith ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 63995373324SBarry Smith row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 64095373324SBarry Smith } 6410452661fSBarry Smith PetscFree(ct); 64278b31e54SBarry Smith ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 64378b31e54SBarry Smith ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 64417699dbbSLois Curfman McInnes if (!rank) { 64578b31e54SBarry Smith ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 64695373324SBarry Smith } 64778b31e54SBarry Smith ierr = MatDestroy(A); CHKERRQ(ierr); 64895373324SBarry Smith } 64995373324SBarry Smith } 6501eb62cbbSBarry Smith return 0; 6511eb62cbbSBarry Smith } 6521eb62cbbSBarry Smith 653416022c9SBarry Smith static int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 654416022c9SBarry Smith { 655416022c9SBarry Smith Mat mat = (Mat) obj; 656416022c9SBarry Smith int ierr; 657416022c9SBarry Smith PetscObject vobj = (PetscObject) viewer; 658416022c9SBarry Smith 659416022c9SBarry Smith if (!viewer) { 660416022c9SBarry Smith viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer; 661416022c9SBarry Smith } 662416022c9SBarry Smith if (vobj->cookie == DRAW_COOKIE && vobj->type == NULLWINDOW) return 0; 663416022c9SBarry Smith else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) { 664d7e8b826SBarry Smith ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 665416022c9SBarry Smith } 666416022c9SBarry Smith else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) { 667d7e8b826SBarry Smith ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 668d7e8b826SBarry Smith } 669d7e8b826SBarry Smith else if (vobj->cookie == VIEWER_COOKIE && vobj->type == MATLAB_VIEWER) { 670d7e8b826SBarry Smith ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 671416022c9SBarry Smith } 672416022c9SBarry Smith else if (vobj->cookie == DRAW_COOKIE) { 673d7e8b826SBarry Smith ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 674416022c9SBarry Smith } 675416022c9SBarry Smith else if (vobj->type == BINARY_FILE_VIEWER) { 676416022c9SBarry Smith return MatView_MPIAIJ_Binary(mat,viewer); 677416022c9SBarry Smith } 678416022c9SBarry Smith return 0; 679416022c9SBarry Smith } 680416022c9SBarry Smith 681ec8511deSBarry Smith extern int MatMarkDiag_SeqAIJ(Mat); 6821eb62cbbSBarry Smith /* 6831eb62cbbSBarry Smith This has to provide several versions. 6841eb62cbbSBarry Smith 6851eb62cbbSBarry Smith 1) per sequential 6861eb62cbbSBarry Smith 2) a) use only local smoothing updating outer values only once. 6871eb62cbbSBarry Smith b) local smoothing updating outer values each inner iteration 688d6dfbf8fSBarry Smith 3) color updating out values betwen colors. 6891eb62cbbSBarry Smith */ 690fc76ce87SLois Curfman McInnes static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 691dbb450caSBarry Smith double fshift,int its,Vec xx) 6928a729477SBarry Smith { 69344a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 694d6dfbf8fSBarry Smith Mat AA = mat->A, BB = mat->B; 695ec8511deSBarry Smith Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 6966abc6512SBarry Smith Scalar zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts; 6976abc6512SBarry Smith int ierr,*idx, *diag; 698416022c9SBarry Smith int n = mat->n, m = mat->m, i,shift = A->indexshift; 699da3a660dSBarry Smith Vec tt; 7008a729477SBarry Smith 701d6dfbf8fSBarry Smith VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 702dbb450caSBarry Smith xs = x + shift; /* shift by one for index start of 1 */ 703dbb450caSBarry Smith ls = ls + shift; 704ec8511deSBarry Smith if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;} 705d6dfbf8fSBarry Smith diag = A->diag; 706acb40c82SBarry Smith if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) { 70748d91487SBarry Smith SETERRQ(1,"MatRelax_MPIAIJ:Option not supported"); 708acb40c82SBarry Smith } 709da3a660dSBarry Smith if (flag & SOR_EISENSTAT) { 710da3a660dSBarry Smith /* Let A = L + U + D; where L is lower trianglar, 711da3a660dSBarry Smith U is upper triangular, E is diagonal; This routine applies 712da3a660dSBarry Smith 713da3a660dSBarry Smith (L + E)^{-1} A (U + E)^{-1} 714da3a660dSBarry Smith 715da3a660dSBarry Smith to a vector efficiently using Eisenstat's trick. This is for 716da3a660dSBarry Smith the case of SSOR preconditioner, so E is D/omega where omega 717da3a660dSBarry Smith is the relaxation factor. 718da3a660dSBarry Smith */ 71978b31e54SBarry Smith ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr); 720464493b3SBarry Smith PLogObjectParent(matin,tt); 721da3a660dSBarry Smith VecGetArray(tt,&t); 722da3a660dSBarry Smith scale = (2.0/omega) - 1.0; 723da3a660dSBarry Smith /* x = (E + U)^{-1} b */ 724da3a660dSBarry Smith VecSet(&zero,mat->lvec); 72564119d58SLois Curfman McInnes ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 72678b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 727da3a660dSBarry Smith for ( i=m-1; i>-1; i-- ) { 728da3a660dSBarry Smith n = A->i[i+1] - diag[i] - 1; 729dbb450caSBarry Smith idx = A->j + diag[i] + !shift; 730dbb450caSBarry Smith v = A->a + diag[i] + !shift; 731da3a660dSBarry Smith sum = b[i]; 732da3a660dSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 733dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 734da3a660dSBarry Smith n = B->i[i+1] - B->i[i]; 735dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 736dbb450caSBarry Smith v = B->a + B->i[i] + shift; 737da3a660dSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 738da3a660dSBarry Smith x[i] = omega*(sum/d); 739da3a660dSBarry Smith } 74064119d58SLois Curfman McInnes ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 74178b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 742da3a660dSBarry Smith 743da3a660dSBarry Smith /* t = b - (2*E - D)x */ 744da3a660dSBarry Smith v = A->a; 745dbb450caSBarry Smith for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ + shift])*x[i]; } 746da3a660dSBarry Smith 747da3a660dSBarry Smith /* t = (E + L)^{-1}t */ 748dbb450caSBarry Smith ts = t + shift; /* shifted by one for index start of a or mat->j*/ 749da3a660dSBarry Smith diag = A->diag; 750da3a660dSBarry Smith VecSet(&zero,mat->lvec); 75164119d58SLois Curfman McInnes ierr = VecPipelineBegin(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 75278b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 753da3a660dSBarry Smith for ( i=0; i<m; i++ ) { 754da3a660dSBarry Smith n = diag[i] - A->i[i]; 755dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 756dbb450caSBarry Smith v = A->a + A->i[i] + shift; 757da3a660dSBarry Smith sum = t[i]; 758da3a660dSBarry Smith SPARSEDENSEMDOT(sum,ts,v,idx,n); 759dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 760da3a660dSBarry Smith n = B->i[i+1] - B->i[i]; 761dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 762dbb450caSBarry Smith v = B->a + B->i[i] + shift; 763da3a660dSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 764da3a660dSBarry Smith t[i] = omega*(sum/d); 765da3a660dSBarry Smith } 76664119d58SLois Curfman McInnes ierr = VecPipelineEnd(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 76778b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 768da3a660dSBarry Smith /* x = x + t */ 769da3a660dSBarry Smith for ( i=0; i<m; i++ ) { x[i] += t[i]; } 770da3a660dSBarry Smith VecDestroy(tt); 771da3a660dSBarry Smith return 0; 772da3a660dSBarry Smith } 773da3a660dSBarry Smith 7741eb62cbbSBarry Smith 775d6dfbf8fSBarry Smith if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){ 776da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 777da3a660dSBarry Smith VecSet(&zero,mat->lvec); VecSet(&zero,xx); 778da3a660dSBarry Smith } 779da3a660dSBarry Smith else { 78064119d58SLois Curfman McInnes ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 78178b31e54SBarry Smith CHKERRQ(ierr); 78264119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 78378b31e54SBarry Smith CHKERRQ(ierr); 784da3a660dSBarry Smith } 785d6dfbf8fSBarry Smith while (its--) { 786d6dfbf8fSBarry Smith /* go down through the rows */ 78764119d58SLois Curfman McInnes ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 78878b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 789d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 790d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 791dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 792dbb450caSBarry Smith v = A->a + A->i[i] + shift; 793d6dfbf8fSBarry Smith sum = b[i]; 794d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 795dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 796d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 797dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 798dbb450caSBarry Smith v = B->a + B->i[i] + shift; 799d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 800d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 801d6dfbf8fSBarry Smith } 80264119d58SLois Curfman McInnes ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 80378b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 804d6dfbf8fSBarry Smith /* come up through the rows */ 80564119d58SLois Curfman McInnes ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 80678b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 807d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 808d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 809dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 810dbb450caSBarry Smith v = A->a + A->i[i] + shift; 811d6dfbf8fSBarry Smith sum = b[i]; 812d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 813dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 814d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 815dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 816dbb450caSBarry Smith v = B->a + B->i[i] + shift; 817d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 818d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 819d6dfbf8fSBarry Smith } 82064119d58SLois Curfman McInnes ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 82178b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 822d6dfbf8fSBarry Smith } 823d6dfbf8fSBarry Smith } 824d6dfbf8fSBarry Smith else if (flag & SOR_FORWARD_SWEEP){ 825da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 826da3a660dSBarry Smith VecSet(&zero,mat->lvec); 82764119d58SLois Curfman McInnes ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 82878b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 829da3a660dSBarry Smith for ( i=0; i<m; i++ ) { 830da3a660dSBarry Smith n = diag[i] - A->i[i]; 831dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 832dbb450caSBarry Smith v = A->a + A->i[i] + shift; 833da3a660dSBarry Smith sum = b[i]; 834da3a660dSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 835dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 836da3a660dSBarry Smith n = B->i[i+1] - B->i[i]; 837dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 838dbb450caSBarry Smith v = B->a + B->i[i] + shift; 839da3a660dSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 840da3a660dSBarry Smith x[i] = omega*(sum/d); 841da3a660dSBarry Smith } 84264119d58SLois Curfman McInnes ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 84378b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 844da3a660dSBarry Smith its--; 845da3a660dSBarry Smith } 846d6dfbf8fSBarry Smith while (its--) { 84764119d58SLois Curfman McInnes ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 84878b31e54SBarry Smith CHKERRQ(ierr); 84964119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 85078b31e54SBarry Smith CHKERRQ(ierr); 85164119d58SLois Curfman McInnes ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 85278b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 853d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 854d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 855dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 856dbb450caSBarry Smith v = A->a + A->i[i] + shift; 857d6dfbf8fSBarry Smith sum = b[i]; 858d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 859dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 860d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 861dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 862dbb450caSBarry Smith v = B->a + B->i[i] + shift; 863d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 864d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 865d6dfbf8fSBarry Smith } 86664119d58SLois Curfman McInnes ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 86778b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 868d6dfbf8fSBarry Smith } 869d6dfbf8fSBarry Smith } 870d6dfbf8fSBarry Smith else if (flag & SOR_BACKWARD_SWEEP){ 871da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 872da3a660dSBarry Smith VecSet(&zero,mat->lvec); 87364119d58SLois Curfman McInnes ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 87478b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 875da3a660dSBarry Smith for ( i=m-1; i>-1; i-- ) { 876da3a660dSBarry Smith n = A->i[i+1] - diag[i] - 1; 877dbb450caSBarry Smith idx = A->j + diag[i] + !shift; 878dbb450caSBarry Smith v = A->a + diag[i] + !shift; 879da3a660dSBarry Smith sum = b[i]; 880da3a660dSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 881dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 882da3a660dSBarry Smith n = B->i[i+1] - B->i[i]; 883dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 884dbb450caSBarry Smith v = B->a + B->i[i] + shift; 885da3a660dSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 886da3a660dSBarry Smith x[i] = omega*(sum/d); 887da3a660dSBarry Smith } 88864119d58SLois Curfman McInnes ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 88978b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 890da3a660dSBarry Smith its--; 891da3a660dSBarry Smith } 892d6dfbf8fSBarry Smith while (its--) { 89364119d58SLois Curfman McInnes ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN, 89478b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 89564119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN, 89678b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 89764119d58SLois Curfman McInnes ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 89878b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 899d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 900d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 901dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 902dbb450caSBarry Smith v = A->a + A->i[i] + shift; 903d6dfbf8fSBarry Smith sum = b[i]; 904d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 905dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 906d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 907dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 908dbb450caSBarry Smith v = B->a + B->i[i] + shift; 909d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 910d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 911d6dfbf8fSBarry Smith } 91264119d58SLois Curfman McInnes ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 91378b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 914d6dfbf8fSBarry Smith } 915d6dfbf8fSBarry Smith } 916d6dfbf8fSBarry Smith else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 917da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 918dbb450caSBarry Smith return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 919da3a660dSBarry Smith } 92064119d58SLois Curfman McInnes ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 92178b31e54SBarry Smith CHKERRQ(ierr); 92264119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 92378b31e54SBarry Smith CHKERRQ(ierr); 924d6dfbf8fSBarry Smith while (its--) { 925d6dfbf8fSBarry Smith /* go down through the rows */ 926d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 927d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 928dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 929dbb450caSBarry Smith v = A->a + A->i[i] + shift; 930d6dfbf8fSBarry Smith sum = b[i]; 931d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 932dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 933d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 934dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 935dbb450caSBarry Smith v = B->a + B->i[i] + shift; 936d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 937d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 938d6dfbf8fSBarry Smith } 939d6dfbf8fSBarry Smith /* come up through the rows */ 940d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 941d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 942dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 943dbb450caSBarry Smith v = A->a + A->i[i] + shift; 944d6dfbf8fSBarry Smith sum = b[i]; 945d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 946dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 947d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 948dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 949dbb450caSBarry Smith v = B->a + B->i[i] + shift; 950d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 951d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 952d6dfbf8fSBarry Smith } 953d6dfbf8fSBarry Smith } 954d6dfbf8fSBarry Smith } 955d6dfbf8fSBarry Smith else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 956da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 957dbb450caSBarry Smith return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 958da3a660dSBarry Smith } 95964119d58SLois Curfman McInnes ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 96078b31e54SBarry Smith CHKERRQ(ierr); 96164119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 96278b31e54SBarry Smith CHKERRQ(ierr); 963d6dfbf8fSBarry Smith while (its--) { 964d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 965d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 966dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 967dbb450caSBarry Smith v = A->a + A->i[i] + shift; 968d6dfbf8fSBarry Smith sum = b[i]; 969d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 970dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 971d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 972dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 973dbb450caSBarry Smith v = B->a + B->i[i] + shift; 974d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 975d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 976d6dfbf8fSBarry Smith } 977d6dfbf8fSBarry Smith } 978d6dfbf8fSBarry Smith } 979d6dfbf8fSBarry Smith else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 980da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 981dbb450caSBarry Smith return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 982da3a660dSBarry Smith } 98364119d58SLois Curfman McInnes ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 98478b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 98564119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 98678b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 987d6dfbf8fSBarry Smith while (its--) { 988d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 989d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 990dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 991dbb450caSBarry Smith v = A->a + A->i[i] + shift; 992d6dfbf8fSBarry Smith sum = b[i]; 993d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 994dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 995d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 996dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 997dbb450caSBarry Smith v = B->a + B->i[i] + shift; 998d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 999d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 1000d6dfbf8fSBarry Smith } 1001d6dfbf8fSBarry Smith } 1002d6dfbf8fSBarry Smith } 10038a729477SBarry Smith return 0; 10048a729477SBarry Smith } 1005a66be287SLois Curfman McInnes 1006fc76ce87SLois Curfman McInnes static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz, 1007fc76ce87SLois Curfman McInnes int *nzalloc,int *mem) 1008a66be287SLois Curfman McInnes { 1009a66be287SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1010a66be287SLois Curfman McInnes Mat A = mat->A, B = mat->B; 1011a66be287SLois Curfman McInnes int ierr, isend[3], irecv[3], nzA, nzallocA, memA; 1012a66be287SLois Curfman McInnes 101378b31e54SBarry Smith ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr); 101478b31e54SBarry Smith ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr); 1015a66be287SLois Curfman McInnes isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA; 1016a66be287SLois Curfman McInnes if (flag == MAT_LOCAL) { 1017a66be287SLois Curfman McInnes *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2]; 1018a66be287SLois Curfman McInnes } else if (flag == MAT_GLOBAL_MAX) { 1019d65a2f8fSBarry Smith MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm); 1020a66be287SLois Curfman McInnes *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 1021a66be287SLois Curfman McInnes } else if (flag == MAT_GLOBAL_SUM) { 1022d65a2f8fSBarry Smith MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm); 1023a66be287SLois Curfman McInnes *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 1024a66be287SLois Curfman McInnes } 1025a66be287SLois Curfman McInnes return 0; 1026a66be287SLois Curfman McInnes } 1027a66be287SLois Curfman McInnes 1028299609e3SLois Curfman McInnes extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 1029299609e3SLois Curfman McInnes extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 1030299609e3SLois Curfman McInnes extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 1031299609e3SLois Curfman McInnes extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 1032299609e3SLois Curfman McInnes extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 1033299609e3SLois Curfman McInnes extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1034299609e3SLois Curfman McInnes extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 1035299609e3SLois Curfman McInnes extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1036299609e3SLois Curfman McInnes 1037416022c9SBarry Smith static int MatSetOption_MPIAIJ(Mat A,MatOption op) 1038c74985f6SBarry Smith { 1039c0bbcb79SLois Curfman McInnes Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1040c74985f6SBarry Smith 1041c0bbcb79SLois Curfman McInnes if (op == NO_NEW_NONZERO_LOCATIONS || 1042c0bbcb79SLois Curfman McInnes op == YES_NEW_NONZERO_LOCATIONS || 1043c0bbcb79SLois Curfman McInnes op == COLUMNS_SORTED || 1044c0bbcb79SLois Curfman McInnes op == ROW_ORIENTED) { 1045c0bbcb79SLois Curfman McInnes MatSetOption(a->A,op); 1046c0bbcb79SLois Curfman McInnes MatSetOption(a->B,op); 1047c74985f6SBarry Smith } 1048c0bbcb79SLois Curfman McInnes else if (op == ROWS_SORTED || 1049c0bbcb79SLois Curfman McInnes op == SYMMETRIC_MATRIX || 1050c0bbcb79SLois Curfman McInnes op == STRUCTURALLY_SYMMETRIC_MATRIX || 1051c0bbcb79SLois Curfman McInnes op == YES_NEW_DIAGONALS) 1052c0bbcb79SLois Curfman McInnes PLogInfo((PetscObject)A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 10534b0e389bSBarry Smith else if (op == COLUMN_ORIENTED) { 10544b0e389bSBarry Smith a->roworiented = 0; 10554b0e389bSBarry Smith MatSetOption(a->A,op); 10564b0e389bSBarry Smith MatSetOption(a->B,op); 10574b0e389bSBarry Smith } 1058c0bbcb79SLois Curfman McInnes else if (op == NO_NEW_DIAGONALS) 10594d39ef84SLois Curfman McInnes {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:NO_NEW_DIAGONALS");} 1060c0bbcb79SLois Curfman McInnes else 10614d39ef84SLois Curfman McInnes {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");} 1062c74985f6SBarry Smith return 0; 1063c74985f6SBarry Smith } 1064c74985f6SBarry Smith 1065d1710a03SLois Curfman McInnes static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1066c74985f6SBarry Smith { 106744a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1068c74985f6SBarry Smith *m = mat->M; *n = mat->N; 1069c74985f6SBarry Smith return 0; 1070c74985f6SBarry Smith } 1071c74985f6SBarry Smith 1072d1710a03SLois Curfman McInnes static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1073c74985f6SBarry Smith { 107444a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1075b7c46309SBarry Smith *m = mat->m; *n = mat->N; 1076c74985f6SBarry Smith return 0; 1077c74985f6SBarry Smith } 1078c74985f6SBarry Smith 1079d1710a03SLois Curfman McInnes static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1080c74985f6SBarry Smith { 108144a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1082c74985f6SBarry Smith *m = mat->rstart; *n = mat->rend; 1083c74985f6SBarry Smith return 0; 1084c74985f6SBarry Smith } 1085c74985f6SBarry Smith 108639e00950SLois Curfman McInnes static int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 108739e00950SLois Curfman McInnes { 1088154123eaSLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1089154123eaSLois Curfman McInnes Scalar *vworkA, *vworkB, **pvA, **pvB; 1090154123eaSLois Curfman McInnes int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1091154123eaSLois Curfman McInnes int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 109239e00950SLois Curfman McInnes 1093b49de8d1SLois Curfman McInnes if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:Only local rows") 1094abc0e9e4SLois Curfman McInnes lrow = row - rstart; 109539e00950SLois Curfman McInnes 1096154123eaSLois Curfman McInnes pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1097154123eaSLois Curfman McInnes if (!v) {pvA = 0; pvB = 0;} 1098154123eaSLois Curfman McInnes if (!idx) {pcA = 0; if (!v) pcB = 0;} 109978b31e54SBarry Smith ierr = MatGetRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 110078b31e54SBarry Smith ierr = MatGetRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1101154123eaSLois Curfman McInnes nztot = nzA + nzB; 1102154123eaSLois Curfman McInnes 1103154123eaSLois Curfman McInnes if (v || idx) { 1104154123eaSLois Curfman McInnes if (nztot) { 1105154123eaSLois Curfman McInnes /* Sort by increasing column numbers, assuming A and B already sorted */ 1106299609e3SLois Curfman McInnes int imark; 1107154123eaSLois Curfman McInnes for (i=0; i<nzB; i++) cworkB[i] = mat->garray[cworkB[i]]; 1108154123eaSLois Curfman McInnes if (v) { 11090452661fSBarry Smith *v = (Scalar *) PetscMalloc( (nztot)*sizeof(Scalar) ); CHKPTRQ(*v); 111039e00950SLois Curfman McInnes for ( i=0; i<nzB; i++ ) { 1111154123eaSLois Curfman McInnes if (cworkB[i] < cstart) (*v)[i] = vworkB[i]; 1112154123eaSLois Curfman McInnes else break; 1113154123eaSLois Curfman McInnes } 1114154123eaSLois Curfman McInnes imark = i; 1115154123eaSLois Curfman McInnes for ( i=0; i<nzA; i++ ) (*v)[imark+i] = vworkA[i]; 1116299609e3SLois Curfman McInnes for ( i=imark; i<nzB; i++ ) (*v)[nzA+i] = vworkB[i]; 1117154123eaSLois Curfman McInnes } 1118154123eaSLois Curfman McInnes if (idx) { 11190452661fSBarry Smith *idx = (int *) PetscMalloc( (nztot)*sizeof(int) ); CHKPTRQ(*idx); 1120154123eaSLois Curfman McInnes for (i=0; i<nzA; i++) cworkA[i] += cstart; 1121154123eaSLois Curfman McInnes for ( i=0; i<nzB; i++ ) { 1122154123eaSLois Curfman McInnes if (cworkB[i] < cstart) (*idx)[i] = cworkB[i]; 1123154123eaSLois Curfman McInnes else break; 1124154123eaSLois Curfman McInnes } 1125154123eaSLois Curfman McInnes imark = i; 1126154123eaSLois Curfman McInnes for ( i=0; i<nzA; i++ ) (*idx)[imark+i] = cworkA[i]; 1127299609e3SLois Curfman McInnes for ( i=imark; i<nzB; i++ ) (*idx)[nzA+i] = cworkB[i]; 112839e00950SLois Curfman McInnes } 112939e00950SLois Curfman McInnes } 113039e00950SLois Curfman McInnes else {*idx = 0; *v=0;} 1131154123eaSLois Curfman McInnes } 113239e00950SLois Curfman McInnes *nz = nztot; 113378b31e54SBarry Smith ierr = MatRestoreRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 113478b31e54SBarry Smith ierr = MatRestoreRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 113539e00950SLois Curfman McInnes return 0; 113639e00950SLois Curfman McInnes } 113739e00950SLois Curfman McInnes 113839e00950SLois Curfman McInnes static int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 113939e00950SLois Curfman McInnes { 11400452661fSBarry Smith if (idx) PetscFree(*idx); 11410452661fSBarry Smith if (v) PetscFree(*v); 114239e00950SLois Curfman McInnes return 0; 114339e00950SLois Curfman McInnes } 114439e00950SLois Curfman McInnes 1145cddf8d76SBarry Smith static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1146855ac2c5SLois Curfman McInnes { 1147855ac2c5SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1148ec8511deSBarry Smith Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1149416022c9SBarry Smith int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1150416022c9SBarry Smith double sum = 0.0; 115104ca555eSLois Curfman McInnes Scalar *v; 115204ca555eSLois Curfman McInnes 115317699dbbSLois Curfman McInnes if (aij->size == 1) { 115414183eadSLois Curfman McInnes ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 115537fa93a5SLois Curfman McInnes } else { 115604ca555eSLois Curfman McInnes if (type == NORM_FROBENIUS) { 115704ca555eSLois Curfman McInnes v = amat->a; 115804ca555eSLois Curfman McInnes for (i=0; i<amat->nz; i++ ) { 115904ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX) 116004ca555eSLois Curfman McInnes sum += real(conj(*v)*(*v)); v++; 116104ca555eSLois Curfman McInnes #else 116204ca555eSLois Curfman McInnes sum += (*v)*(*v); v++; 116304ca555eSLois Curfman McInnes #endif 116404ca555eSLois Curfman McInnes } 116504ca555eSLois Curfman McInnes v = bmat->a; 116604ca555eSLois Curfman McInnes for (i=0; i<bmat->nz; i++ ) { 116704ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX) 116804ca555eSLois Curfman McInnes sum += real(conj(*v)*(*v)); v++; 116904ca555eSLois Curfman McInnes #else 117004ca555eSLois Curfman McInnes sum += (*v)*(*v); v++; 117104ca555eSLois Curfman McInnes #endif 117204ca555eSLois Curfman McInnes } 117304ca555eSLois Curfman McInnes MPI_Allreduce((void*)&sum,(void*)norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 117404ca555eSLois Curfman McInnes *norm = sqrt(*norm); 117504ca555eSLois Curfman McInnes } 117604ca555eSLois Curfman McInnes else if (type == NORM_1) { /* max column norm */ 117704ca555eSLois Curfman McInnes double *tmp, *tmp2; 117804ca555eSLois Curfman McInnes int *jj, *garray = aij->garray; 11790452661fSBarry Smith tmp = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp); 11800452661fSBarry Smith tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2); 1181cddf8d76SBarry Smith PetscMemzero(tmp,aij->N*sizeof(double)); 118204ca555eSLois Curfman McInnes *norm = 0.0; 118304ca555eSLois Curfman McInnes v = amat->a; jj = amat->j; 118404ca555eSLois Curfman McInnes for ( j=0; j<amat->nz; j++ ) { 1185579c6b6fSBarry Smith tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 118604ca555eSLois Curfman McInnes } 118704ca555eSLois Curfman McInnes v = bmat->a; jj = bmat->j; 118804ca555eSLois Curfman McInnes for ( j=0; j<bmat->nz; j++ ) { 1189579c6b6fSBarry Smith tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 119004ca555eSLois Curfman McInnes } 119104ca555eSLois Curfman McInnes MPI_Allreduce((void*)tmp,(void*)tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm); 119204ca555eSLois Curfman McInnes for ( j=0; j<aij->N; j++ ) { 119304ca555eSLois Curfman McInnes if (tmp2[j] > *norm) *norm = tmp2[j]; 119404ca555eSLois Curfman McInnes } 11950452661fSBarry Smith PetscFree(tmp); PetscFree(tmp2); 119604ca555eSLois Curfman McInnes } 119704ca555eSLois Curfman McInnes else if (type == NORM_INFINITY) { /* max row norm */ 1198515d9167SLois Curfman McInnes double ntemp = 0.0; 119904ca555eSLois Curfman McInnes for ( j=0; j<amat->m; j++ ) { 1200dbb450caSBarry Smith v = amat->a + amat->i[j] + shift; 120104ca555eSLois Curfman McInnes sum = 0.0; 120204ca555eSLois Curfman McInnes for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1203cddf8d76SBarry Smith sum += PetscAbsScalar(*v); v++; 120404ca555eSLois Curfman McInnes } 1205dbb450caSBarry Smith v = bmat->a + bmat->i[j] + shift; 120604ca555eSLois Curfman McInnes for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1207cddf8d76SBarry Smith sum += PetscAbsScalar(*v); v++; 120804ca555eSLois Curfman McInnes } 1209515d9167SLois Curfman McInnes if (sum > ntemp) ntemp = sum; 121004ca555eSLois Curfman McInnes } 1211515d9167SLois Curfman McInnes MPI_Allreduce((void*)&ntemp,(void*)norm,1,MPI_DOUBLE,MPI_MAX,mat->comm); 121204ca555eSLois Curfman McInnes } 121304ca555eSLois Curfman McInnes else { 1214515d9167SLois Curfman McInnes SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm"); 121504ca555eSLois Curfman McInnes } 121637fa93a5SLois Curfman McInnes } 1217855ac2c5SLois Curfman McInnes return 0; 1218855ac2c5SLois Curfman McInnes } 1219855ac2c5SLois Curfman McInnes 12200de55854SLois Curfman McInnes static int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1221b7c46309SBarry Smith { 1222b7c46309SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1223dbb450caSBarry Smith Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1224416022c9SBarry Smith int ierr,shift = Aloc->indexshift; 1225416022c9SBarry Smith Mat B; 1226b7c46309SBarry Smith int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1227b7c46309SBarry Smith Scalar *array; 1228b7c46309SBarry Smith 1229416022c9SBarry Smith if (!matout && M != N) SETERRQ(1,"MatTranspose_MPIAIJ:Square only in-place"); 1230b4fd4287SBarry Smith ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0, 1231b4fd4287SBarry Smith PETSC_NULL,&B); CHKERRQ(ierr); 1232b7c46309SBarry Smith 1233b7c46309SBarry Smith /* copy over the A part */ 1234ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) a->A->data; 1235b7c46309SBarry Smith m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1236b7c46309SBarry Smith row = a->rstart; 1237dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1238b7c46309SBarry Smith for ( i=0; i<m; i++ ) { 1239416022c9SBarry Smith ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1240b7c46309SBarry Smith row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1241b7c46309SBarry Smith } 1242b7c46309SBarry Smith aj = Aloc->j; 1243dbb450caSBarry Smith for ( i=0; i<ai[m]|+shift; i++ ) {aj[i] -= a->cstart + shift;} 1244b7c46309SBarry Smith 1245b7c46309SBarry Smith /* copy over the B part */ 1246ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) a->B->data; 1247b7c46309SBarry Smith m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1248b7c46309SBarry Smith row = a->rstart; 12490452661fSBarry Smith ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1250dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1251b7c46309SBarry Smith for ( i=0; i<m; i++ ) { 1252416022c9SBarry Smith ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1253b7c46309SBarry Smith row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1254b7c46309SBarry Smith } 12550452661fSBarry Smith PetscFree(ct); 1256b7c46309SBarry Smith ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1257b7c46309SBarry Smith ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 12580de55854SLois Curfman McInnes if (matout) { 12590de55854SLois Curfman McInnes *matout = B; 12600de55854SLois Curfman McInnes } else { 12610de55854SLois Curfman McInnes /* This isn't really an in-place transpose .... but free data structures from a */ 12620452661fSBarry Smith PetscFree(a->rowners); 12630de55854SLois Curfman McInnes ierr = MatDestroy(a->A); CHKERRQ(ierr); 12640de55854SLois Curfman McInnes ierr = MatDestroy(a->B); CHKERRQ(ierr); 12650452661fSBarry Smith if (a->colmap) PetscFree(a->colmap); 12660452661fSBarry Smith if (a->garray) PetscFree(a->garray); 12670de55854SLois Curfman McInnes if (a->lvec) VecDestroy(a->lvec); 1268a56f8943SBarry Smith if (a->Mvctx) VecScatterDestroy(a->Mvctx); 12690452661fSBarry Smith PetscFree(a); 1270416022c9SBarry Smith PetscMemcpy(A,B,sizeof(struct _Mat)); 12710452661fSBarry Smith PetscHeaderDestroy(B); 12720de55854SLois Curfman McInnes } 1273b7c46309SBarry Smith return 0; 1274b7c46309SBarry Smith } 1275b7c46309SBarry Smith 1276682d7d0cSBarry Smith extern int MatPrintHelp_SeqAIJ(Mat); 1277682d7d0cSBarry Smith static int MatPrintHelp_MPIAIJ(Mat A) 1278682d7d0cSBarry Smith { 1279682d7d0cSBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1280682d7d0cSBarry Smith 1281682d7d0cSBarry Smith if (!a->rank) return MatPrintHelp_SeqAIJ(a->A); 1282682d7d0cSBarry Smith else return 0; 1283682d7d0cSBarry Smith } 1284682d7d0cSBarry Smith 1285fc76ce87SLois Curfman McInnes extern int MatConvert_MPIAIJ(Mat,MatType,Mat *); 12863d1612f7SBarry Smith static int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 12872f86bd48SSatish Balay extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 12888a729477SBarry Smith /* -------------------------------------------------------------------*/ 12892ee70a88SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 129039e00950SLois Curfman McInnes MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 129144a69424SLois Curfman McInnes MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 129244a69424SLois Curfman McInnes MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1293299609e3SLois Curfman McInnes MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ, 1294299609e3SLois Curfman McInnes MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ, 1295299609e3SLois Curfman McInnes MatLUFactor_MPIAIJ,0, 129644a69424SLois Curfman McInnes MatRelax_MPIAIJ, 1297b7c46309SBarry Smith MatTranspose_MPIAIJ, 1298a66be287SLois Curfman McInnes MatGetInfo_MPIAIJ,0, 1299855ac2c5SLois Curfman McInnes MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ, 1300ee50ffe9SBarry Smith MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 13011eb62cbbSBarry Smith 0, 1302299609e3SLois Curfman McInnes MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1303299609e3SLois Curfman McInnes MatGetReordering_MPIAIJ, 1304299609e3SLois Curfman McInnes MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0, 1305d1710a03SLois Curfman McInnes MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1306299609e3SLois Curfman McInnes MatILUFactorSymbolic_MPIAIJ,0, 13073d1612f7SBarry Smith 0,0,MatConvert_MPIAIJ,0,0,MatConvertSameType_MPIAIJ,0,0, 1308b49de8d1SLois Curfman McInnes 0,0,0, 13097fab95ffSSatish Balay 0,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0, 1310052efed2SBarry Smith MatPrintHelp_MPIAIJ, 1311052efed2SBarry Smith MatScale_MPIAIJ}; 13128a729477SBarry Smith 13131987afe7SBarry Smith /*@C 1314ff756334SLois Curfman McInnes MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1315*b810aeb4SBarry Smith (the default parallel PETSc format). For good performance you should 1316*b810aeb4SBarry Smith preallocate the matrix storage by setting the parameters d_nz or d_nzz and 1317*b810aeb4SBarry Smith o_nz or o_nzz. By setting these parameters accurately performance can 1318*b810aeb4SBarry Smith be increased by more then a factor of 50. 13198a729477SBarry Smith 13208a729477SBarry Smith Input Parameters: 13211eb62cbbSBarry Smith . comm - MPI communicator 13227d3e4905SLois Curfman McInnes . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 13237d3e4905SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated 13247d3e4905SLois Curfman McInnes if N is given) 13257d3e4905SLois Curfman McInnes . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 13267d3e4905SLois Curfman McInnes . N - number of global columns (or PETSC_DECIDE to have calculated 13277d3e4905SLois Curfman McInnes if n is given) 1328ab693e5aSLois Curfman McInnes . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1329ff756334SLois Curfman McInnes (same for all local rows) 1330ab693e5aSLois Curfman McInnes . d_nzz - number of nonzeros per row in diagonal portion of local submatrix 1331ab693e5aSLois Curfman McInnes or null (possibly different for each row). You must leave room 1332ab693e5aSLois Curfman McInnes for the diagonal entry even if it is zero. 1333ab693e5aSLois Curfman McInnes . o_nz - number of nonzeros per row in off-diagonal portion of local 1334ab693e5aSLois Curfman McInnes submatrix (same for all local rows). 1335ab693e5aSLois Curfman McInnes . o_nzz - number of nonzeros per row in off-diagonal portion of local 1336ab693e5aSLois Curfman McInnes submatrix or null (possibly different for each row). 13378a729477SBarry Smith 1338ff756334SLois Curfman McInnes Output Parameter: 13398a729477SBarry Smith . newmat - the matrix 13408a729477SBarry Smith 1341ff756334SLois Curfman McInnes Notes: 1342ff756334SLois Curfman McInnes The AIJ format (also called the Yale sparse matrix format or 1343ff756334SLois Curfman McInnes compressed row storage), is fully compatible with standard Fortran 77 13440002213bSLois Curfman McInnes storage. That is, the stored row and column indices can begin at 13450002213bSLois Curfman McInnes either one (as in Fortran) or zero. See the users manual for details. 1346ff756334SLois Curfman McInnes 1347ff756334SLois Curfman McInnes The user MUST specify either the local or global matrix dimensions 1348ff756334SLois Curfman McInnes (possibly both). 1349ff756334SLois Curfman McInnes 1350e0245417SLois Curfman McInnes Storage Information: 1351e0245417SLois Curfman McInnes For a square global matrix we define each processor's diagonal portion 1352e0245417SLois Curfman McInnes to be its local rows and the corresponding columns (a square submatrix); 1353e0245417SLois Curfman McInnes each processor's off-diagonal portion encompasses the remainder of the 1354e0245417SLois Curfman McInnes local matrix (a rectangular submatrix). 1355e0245417SLois Curfman McInnes 1356e0245417SLois Curfman McInnes The user can specify preallocated storage for the diagonal part of 13575ace5be8SLois Curfman McInnes the local submatrix with either d_nz or d_nnz (not both). Set 13585ace5be8SLois Curfman McInnes d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 13595ace5be8SLois Curfman McInnes memory allocation. Likewise, specify preallocated storage for the 13605ace5be8SLois Curfman McInnes off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1361ff756334SLois Curfman McInnes 1362*b810aeb4SBarry Smith Consider a processor that owns rows 3, 4 and 5 (in this figure we depict 1363*b810aeb4SBarry Smith those three rows and all columns (0-11). 13642191d07cSBarry Smith 1365*b810aeb4SBarry Smith $ 0 1 2 3 4 5 6 7 8 9 10 11 1366*b810aeb4SBarry Smith $ ------------------- 1367*b810aeb4SBarry Smith $ 3 | o o o d d d o o o o o o 1368*b810aeb4SBarry Smith $ 4 | o o o d d d o o o o o o 1369*b810aeb4SBarry Smith $ 5 | o o o d d d o o o o o o 1370*b810aeb4SBarry Smith $ 1371*b810aeb4SBarry Smith Thus any entries in the d locations are stored in the d matrix and 1372*b810aeb4SBarry Smith any in the o locations are store in the o matrix. (The d and the o matrix 1373*b810aeb4SBarry Smith are simply SeqAIJ matrix (that is they store the matrices in compress 1374*b810aeb4SBarry Smith row storage)). 1375*b810aeb4SBarry Smith 1376*b810aeb4SBarry Smith Now d_nz should give the number of non zeros per row in the d matrix 1377*b810aeb4SBarry Smith and o_nz should give the number of nonzeros per row in the o matrix. 1378*b810aeb4SBarry Smith In general for PDE problems where most nonzeros are near the diagonal 1379*b810aeb4SBarry Smith one expects d_nz >> o_nz. See the users manual chapter on matrices for 1380*b810aeb4SBarry Smith more details. 13812191d07cSBarry Smith 1382c451ab25SLois Curfman McInnes By default, this format uses inodes (identical nodes) when possible. 1383c451ab25SLois Curfman McInnes We search for consecutive rows with the same nonzero structure, thereby 1384c451ab25SLois Curfman McInnes reusing matrix information to achieve increased efficiency. 1385c451ab25SLois Curfman McInnes 1386c451ab25SLois Curfman McInnes Options Database Keys: 1387c451ab25SLois Curfman McInnes $ -mat_aij_no_inode - Do not use inodes 1388c451ab25SLois Curfman McInnes $ -mat_aij_inode_limit <limit> - Set inode limit (max limit=5) 1389c451ab25SLois Curfman McInnes 1390dbd7a890SLois Curfman McInnes .keywords: matrix, aij, compressed row, sparse, parallel 1391ff756334SLois Curfman McInnes 1392fafbff53SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 13938a729477SBarry Smith @*/ 13941eb62cbbSBarry Smith int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 13951eb62cbbSBarry Smith int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *newmat) 13968a729477SBarry Smith { 13978a729477SBarry Smith Mat mat; 1398416022c9SBarry Smith Mat_MPIAIJ *a; 13996abc6512SBarry Smith int ierr, i,sum[2],work[2]; 1400416022c9SBarry Smith 14018a729477SBarry Smith *newmat = 0; 14020452661fSBarry Smith PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 1403a5a9c739SBarry Smith PLogObjectCreate(mat); 14040452661fSBarry Smith mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1405416022c9SBarry Smith PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 140644a69424SLois Curfman McInnes mat->destroy = MatDestroy_MPIAIJ; 140744a69424SLois Curfman McInnes mat->view = MatView_MPIAIJ; 14088a729477SBarry Smith mat->factor = 0; 1409c456f294SBarry Smith mat->assembled = PETSC_FALSE; 1410d6dfbf8fSBarry Smith 141164119d58SLois Curfman McInnes a->insertmode = NOT_SET_VALUES; 141217699dbbSLois Curfman McInnes MPI_Comm_rank(comm,&a->rank); 141317699dbbSLois Curfman McInnes MPI_Comm_size(comm,&a->size); 14141eb62cbbSBarry Smith 1415b4fd4287SBarry Smith if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 14161987afe7SBarry Smith SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSc decide rows but set d_nnz or o_nnz"); 14171987afe7SBarry Smith 1418dbd7a890SLois Curfman McInnes if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 14191eb62cbbSBarry Smith work[0] = m; work[1] = n; 1420d65a2f8fSBarry Smith MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1421dbd7a890SLois Curfman McInnes if (M == PETSC_DECIDE) M = sum[0]; 1422dbd7a890SLois Curfman McInnes if (N == PETSC_DECIDE) N = sum[1]; 14231eb62cbbSBarry Smith } 142417699dbbSLois Curfman McInnes if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);} 142517699dbbSLois Curfman McInnes if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);} 1426416022c9SBarry Smith a->m = m; 1427416022c9SBarry Smith a->n = n; 1428416022c9SBarry Smith a->N = N; 1429416022c9SBarry Smith a->M = M; 14301eb62cbbSBarry Smith 14311eb62cbbSBarry Smith /* build local table of row and column ownerships */ 14320452661fSBarry Smith a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1433579c6b6fSBarry Smith PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 143417699dbbSLois Curfman McInnes a->cowners = a->rowners + a->size + 1; 1435416022c9SBarry Smith MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm); 1436416022c9SBarry Smith a->rowners[0] = 0; 143717699dbbSLois Curfman McInnes for ( i=2; i<=a->size; i++ ) { 1438416022c9SBarry Smith a->rowners[i] += a->rowners[i-1]; 14398a729477SBarry Smith } 144017699dbbSLois Curfman McInnes a->rstart = a->rowners[a->rank]; 144117699dbbSLois Curfman McInnes a->rend = a->rowners[a->rank+1]; 1442416022c9SBarry Smith MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm); 1443416022c9SBarry Smith a->cowners[0] = 0; 144417699dbbSLois Curfman McInnes for ( i=2; i<=a->size; i++ ) { 1445416022c9SBarry Smith a->cowners[i] += a->cowners[i-1]; 14468a729477SBarry Smith } 144717699dbbSLois Curfman McInnes a->cstart = a->cowners[a->rank]; 144817699dbbSLois Curfman McInnes a->cend = a->cowners[a->rank+1]; 14498a729477SBarry Smith 14505ace5be8SLois Curfman McInnes if (d_nz == PETSC_DEFAULT) d_nz = 5; 1451416022c9SBarry Smith ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&a->A); CHKERRQ(ierr); 1452416022c9SBarry Smith PLogObjectParent(mat,a->A); 14537b8455f0SLois Curfman McInnes if (o_nz == PETSC_DEFAULT) o_nz = 0; 1454416022c9SBarry Smith ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&a->B); CHKERRQ(ierr); 1455416022c9SBarry Smith PLogObjectParent(mat,a->B); 14568a729477SBarry Smith 14571eb62cbbSBarry Smith /* build cache for off array entries formed */ 1458416022c9SBarry Smith ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr); 1459416022c9SBarry Smith a->colmap = 0; 1460416022c9SBarry Smith a->garray = 0; 14614b0e389bSBarry Smith a->roworiented = 1; 14628a729477SBarry Smith 14631eb62cbbSBarry Smith /* stuff used for matrix vector multiply */ 1464416022c9SBarry Smith a->lvec = 0; 1465416022c9SBarry Smith a->Mvctx = 0; 14668a729477SBarry Smith 1467d6dfbf8fSBarry Smith *newmat = mat; 1468d6dfbf8fSBarry Smith return 0; 1469d6dfbf8fSBarry Smith } 1470c74985f6SBarry Smith 14713d1612f7SBarry Smith static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1472d6dfbf8fSBarry Smith { 1473d6dfbf8fSBarry Smith Mat mat; 1474416022c9SBarry Smith Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 147525cdf11fSBarry Smith int ierr, len,flg; 1476d6dfbf8fSBarry Smith 1477416022c9SBarry Smith *newmat = 0; 14780452661fSBarry Smith PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1479a5a9c739SBarry Smith PLogObjectCreate(mat); 14800452661fSBarry Smith mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1481416022c9SBarry Smith PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 148244a69424SLois Curfman McInnes mat->destroy = MatDestroy_MPIAIJ; 148344a69424SLois Curfman McInnes mat->view = MatView_MPIAIJ; 1484d6dfbf8fSBarry Smith mat->factor = matin->factor; 1485c456f294SBarry Smith mat->assembled = PETSC_TRUE; 1486d6dfbf8fSBarry Smith 1487416022c9SBarry Smith a->m = oldmat->m; 1488416022c9SBarry Smith a->n = oldmat->n; 1489416022c9SBarry Smith a->M = oldmat->M; 1490416022c9SBarry Smith a->N = oldmat->N; 1491d6dfbf8fSBarry Smith 1492416022c9SBarry Smith a->rstart = oldmat->rstart; 1493416022c9SBarry Smith a->rend = oldmat->rend; 1494416022c9SBarry Smith a->cstart = oldmat->cstart; 1495416022c9SBarry Smith a->cend = oldmat->cend; 149617699dbbSLois Curfman McInnes a->size = oldmat->size; 149717699dbbSLois Curfman McInnes a->rank = oldmat->rank; 149864119d58SLois Curfman McInnes a->insertmode = NOT_SET_VALUES; 1499d6dfbf8fSBarry Smith 15000452661fSBarry Smith a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners); 150117699dbbSLois Curfman McInnes PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 150217699dbbSLois Curfman McInnes PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int)); 1503416022c9SBarry Smith ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 15042ee70a88SLois Curfman McInnes if (oldmat->colmap) { 15050452661fSBarry Smith a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1506416022c9SBarry Smith PLogObjectMemory(mat,(a->N)*sizeof(int)); 1507416022c9SBarry Smith PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1508416022c9SBarry Smith } else a->colmap = 0; 1509ec8511deSBarry Smith if (oldmat->garray && (len = ((Mat_SeqAIJ *) (oldmat->B->data))->n)) { 15100452661fSBarry Smith a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 1511464493b3SBarry Smith PLogObjectMemory(mat,len*sizeof(int)); 1512416022c9SBarry Smith PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1513416022c9SBarry Smith } else a->garray = 0; 1514d6dfbf8fSBarry Smith 1515416022c9SBarry Smith ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1516416022c9SBarry Smith PLogObjectParent(mat,a->lvec); 1517a56f8943SBarry Smith ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1518416022c9SBarry Smith PLogObjectParent(mat,a->Mvctx); 1519416022c9SBarry Smith ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1520416022c9SBarry Smith PLogObjectParent(mat,a->A); 1521416022c9SBarry Smith ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1522416022c9SBarry Smith PLogObjectParent(mat,a->B); 15235dd7a6c7SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 152425cdf11fSBarry Smith if (flg) { 1525682d7d0cSBarry Smith ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1526682d7d0cSBarry Smith } 15278a729477SBarry Smith *newmat = mat; 15288a729477SBarry Smith return 0; 15298a729477SBarry Smith } 1530416022c9SBarry Smith 1531416022c9SBarry Smith #include "sysio.h" 1532416022c9SBarry Smith 1533c456f294SBarry Smith int MatLoad_MPIAIJ(Viewer bview,MatType type,Mat *newmat) 1534416022c9SBarry Smith { 1535d65a2f8fSBarry Smith Mat A; 1536416022c9SBarry Smith int i, nz, ierr, j,rstart, rend, fd; 1537d65a2f8fSBarry Smith Scalar *vals,*svals; 1538416022c9SBarry Smith PetscObject vobj = (PetscObject) bview; 1539416022c9SBarry Smith MPI_Comm comm = vobj->comm; 1540416022c9SBarry Smith MPI_Status status; 154117699dbbSLois Curfman McInnes int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1542d65a2f8fSBarry Smith int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1543d65a2f8fSBarry Smith int tag = ((PetscObject)bview)->tag; 1544416022c9SBarry Smith 154517699dbbSLois Curfman McInnes MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 154617699dbbSLois Curfman McInnes if (!rank) { 1547416022c9SBarry Smith ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr); 1548416022c9SBarry Smith ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr); 1549c456f294SBarry Smith if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJ:not matrix object"); 1550416022c9SBarry Smith } 1551416022c9SBarry Smith 1552416022c9SBarry Smith MPI_Bcast(header+1,3,MPI_INT,0,comm); 1553416022c9SBarry Smith M = header[1]; N = header[2]; 1554416022c9SBarry Smith /* determine ownership of all rows */ 155517699dbbSLois Curfman McInnes m = M/size + ((M % size) > rank); 15560452661fSBarry Smith rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1557416022c9SBarry Smith MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1558416022c9SBarry Smith rowners[0] = 0; 155917699dbbSLois Curfman McInnes for ( i=2; i<=size; i++ ) { 1560416022c9SBarry Smith rowners[i] += rowners[i-1]; 1561416022c9SBarry Smith } 156217699dbbSLois Curfman McInnes rstart = rowners[rank]; 156317699dbbSLois Curfman McInnes rend = rowners[rank+1]; 1564416022c9SBarry Smith 1565416022c9SBarry Smith /* distribute row lengths to all processors */ 15660452661fSBarry Smith ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1567416022c9SBarry Smith offlens = ourlens + (rend-rstart); 156817699dbbSLois Curfman McInnes if (!rank) { 15690452661fSBarry Smith rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1570416022c9SBarry Smith ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr); 15710452661fSBarry Smith sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 157217699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1573d65a2f8fSBarry Smith MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 15740452661fSBarry Smith PetscFree(sndcounts); 1575416022c9SBarry Smith } 1576416022c9SBarry Smith else { 1577416022c9SBarry Smith MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1578416022c9SBarry Smith } 1579416022c9SBarry Smith 158017699dbbSLois Curfman McInnes if (!rank) { 1581416022c9SBarry Smith /* calculate the number of nonzeros on each processor */ 15820452661fSBarry Smith procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1583cddf8d76SBarry Smith PetscMemzero(procsnz,size*sizeof(int)); 158417699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 1585416022c9SBarry Smith for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1586416022c9SBarry Smith procsnz[i] += rowlengths[j]; 1587416022c9SBarry Smith } 1588416022c9SBarry Smith } 15890452661fSBarry Smith PetscFree(rowlengths); 1590416022c9SBarry Smith 1591416022c9SBarry Smith /* determine max buffer needed and allocate it */ 1592416022c9SBarry Smith maxnz = 0; 159317699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 15940452661fSBarry Smith maxnz = PetscMax(maxnz,procsnz[i]); 1595416022c9SBarry Smith } 15960452661fSBarry Smith cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1597416022c9SBarry Smith 1598416022c9SBarry Smith /* read in my part of the matrix column indices */ 1599416022c9SBarry Smith nz = procsnz[0]; 16000452661fSBarry Smith mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1601d65a2f8fSBarry Smith ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr); 1602d65a2f8fSBarry Smith 1603d65a2f8fSBarry Smith /* read in every one elses and ship off */ 160417699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { 1605d65a2f8fSBarry Smith nz = procsnz[i]; 1606416022c9SBarry Smith ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr); 1607d65a2f8fSBarry Smith MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1608d65a2f8fSBarry Smith } 16090452661fSBarry Smith PetscFree(cols); 1610416022c9SBarry Smith } 1611416022c9SBarry Smith else { 1612416022c9SBarry Smith /* determine buffer space needed for message */ 1613416022c9SBarry Smith nz = 0; 1614416022c9SBarry Smith for ( i=0; i<m; i++ ) { 1615416022c9SBarry Smith nz += ourlens[i]; 1616416022c9SBarry Smith } 16170452661fSBarry Smith mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1618416022c9SBarry Smith 1619416022c9SBarry Smith /* receive message of column indices*/ 1620d65a2f8fSBarry Smith MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1621416022c9SBarry Smith MPI_Get_count(&status,MPI_INT,&maxnz); 1622c456f294SBarry Smith if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file"); 1623416022c9SBarry Smith } 1624416022c9SBarry Smith 1625416022c9SBarry Smith /* loop over local rows, determining number of off diagonal entries */ 1626cddf8d76SBarry Smith PetscMemzero(offlens,m*sizeof(int)); 1627416022c9SBarry Smith jj = 0; 1628416022c9SBarry Smith for ( i=0; i<m; i++ ) { 1629416022c9SBarry Smith for ( j=0; j<ourlens[i]; j++ ) { 1630d65a2f8fSBarry Smith if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1631416022c9SBarry Smith jj++; 1632416022c9SBarry Smith } 1633416022c9SBarry Smith } 1634d65a2f8fSBarry Smith 1635d65a2f8fSBarry Smith /* create our matrix */ 1636416022c9SBarry Smith for ( i=0; i<m; i++ ) { 1637416022c9SBarry Smith ourlens[i] -= offlens[i]; 1638416022c9SBarry Smith } 1639d65a2f8fSBarry Smith ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1640d65a2f8fSBarry Smith A = *newmat; 1641d65a2f8fSBarry Smith MatSetOption(A,COLUMNS_SORTED); 1642d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 1643d65a2f8fSBarry Smith ourlens[i] += offlens[i]; 1644d65a2f8fSBarry Smith } 1645416022c9SBarry Smith 164617699dbbSLois Curfman McInnes if (!rank) { 16470452661fSBarry Smith vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1648416022c9SBarry Smith 1649416022c9SBarry Smith /* read in my part of the matrix numerical values */ 1650416022c9SBarry Smith nz = procsnz[0]; 1651416022c9SBarry Smith ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 1652d65a2f8fSBarry Smith 1653d65a2f8fSBarry Smith /* insert into matrix */ 1654d65a2f8fSBarry Smith jj = rstart; 1655d65a2f8fSBarry Smith smycols = mycols; 1656d65a2f8fSBarry Smith svals = vals; 1657d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 1658d65a2f8fSBarry Smith ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1659d65a2f8fSBarry Smith smycols += ourlens[i]; 1660d65a2f8fSBarry Smith svals += ourlens[i]; 1661d65a2f8fSBarry Smith jj++; 1662416022c9SBarry Smith } 1663416022c9SBarry Smith 1664d65a2f8fSBarry Smith /* read in other processors and ship out */ 166517699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { 1666416022c9SBarry Smith nz = procsnz[i]; 1667416022c9SBarry Smith ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 1668d65a2f8fSBarry Smith MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1669416022c9SBarry Smith } 16700452661fSBarry Smith PetscFree(procsnz); 1671416022c9SBarry Smith } 1672d65a2f8fSBarry Smith else { 1673d65a2f8fSBarry Smith /* receive numeric values */ 16740452661fSBarry Smith vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1675416022c9SBarry Smith 1676d65a2f8fSBarry Smith /* receive message of values*/ 1677d65a2f8fSBarry Smith MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1678d65a2f8fSBarry Smith MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1679c456f294SBarry Smith if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file"); 1680d65a2f8fSBarry Smith 1681d65a2f8fSBarry Smith /* insert into matrix */ 1682d65a2f8fSBarry Smith jj = rstart; 1683d65a2f8fSBarry Smith smycols = mycols; 1684d65a2f8fSBarry Smith svals = vals; 1685d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 1686d65a2f8fSBarry Smith ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1687d65a2f8fSBarry Smith smycols += ourlens[i]; 1688d65a2f8fSBarry Smith svals += ourlens[i]; 1689d65a2f8fSBarry Smith jj++; 1690d65a2f8fSBarry Smith } 1691d65a2f8fSBarry Smith } 16920452661fSBarry Smith PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1693d65a2f8fSBarry Smith 1694d65a2f8fSBarry Smith ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1695d65a2f8fSBarry Smith ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1696416022c9SBarry Smith return 0; 1697416022c9SBarry Smith } 1698