1cb512458SBarry Smith #ifndef lint 2*2191d07cSBarry Smith static char vcid[] = "$Id: mpiaij.c,v 1.117 1996/01/26 04:33:52 bsmith Exp bsmith $"; 3cb512458SBarry Smith #endif 48a729477SBarry Smith 51eb62cbbSBarry Smith #include "mpiaij.h" 68a729477SBarry Smith #include "vec/vecimpl.h" 7d6dfbf8fSBarry Smith #include "inline/spops.h" 88a729477SBarry Smith 99e25ed09SBarry Smith /* local utility routine that creates a mapping from the global column 109e25ed09SBarry Smith number to the local number in the off-diagonal part of the local 119e25ed09SBarry Smith storage of the matrix. This is done in a non scable way since the 129e25ed09SBarry Smith length of colmap equals the global matrix length. 139e25ed09SBarry Smith */ 14b7c46309SBarry Smith static int CreateColmap_Private(Mat mat) 159e25ed09SBarry Smith { 1644a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 17ec8511deSBarry Smith Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 18416022c9SBarry Smith int n = B->n,i,shift = B->indexshift; 19dbb450caSBarry Smith 200452661fSBarry Smith aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap); 21464493b3SBarry Smith PLogObjectMemory(mat,aij->N*sizeof(int)); 22cddf8d76SBarry Smith PetscMemzero(aij->colmap,aij->N*sizeof(int)); 23dbb450caSBarry Smith for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i-shift; 249e25ed09SBarry Smith return 0; 259e25ed09SBarry Smith } 269e25ed09SBarry Smith 272493cbb0SBarry Smith extern int DisAssemble_MPIAIJ(Mat); 282493cbb0SBarry Smith 2951c98ccdSLois Curfman McInnes static int MatGetReordering_MPIAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm) 30299609e3SLois Curfman McInnes { 31299609e3SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 32299609e3SLois Curfman McInnes int ierr; 3317699dbbSLois Curfman McInnes if (aij->size == 1) { 3451c98ccdSLois Curfman McInnes ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr); 3548d91487SBarry Smith } else SETERRQ(1,"MatGetReordering_MPIAIJ:not supported in parallel"); 36299609e3SLois Curfman McInnes return 0; 37299609e3SLois Curfman McInnes } 38299609e3SLois Curfman McInnes 394b0e389bSBarry Smith static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 408a729477SBarry Smith { 4144a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 42dbb450caSBarry Smith Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data; 434b0e389bSBarry Smith Scalar value; 441eb62cbbSBarry Smith int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 451eb62cbbSBarry Smith int cstart = aij->cstart, cend = aij->cend,row,col; 464b0e389bSBarry Smith int shift = C->indexshift,roworiented = aij->roworiented; 478a729477SBarry Smith 4864119d58SLois Curfman McInnes if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) { 4948d91487SBarry Smith SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds"); 508a729477SBarry Smith } 511eb62cbbSBarry Smith aij->insertmode = addv; 528a729477SBarry Smith for ( i=0; i<m; i++ ) { 534b0e389bSBarry Smith if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row"); 544b0e389bSBarry Smith if (im[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large"); 554b0e389bSBarry Smith if (im[i] >= rstart && im[i] < rend) { 564b0e389bSBarry Smith row = im[i] - rstart; 571eb62cbbSBarry Smith for ( j=0; j<n; j++ ) { 584b0e389bSBarry Smith if (in[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column"); 594b0e389bSBarry Smith if (in[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large"); 604b0e389bSBarry Smith if (in[j] >= cstart && in[j] < cend){ 614b0e389bSBarry Smith col = in[j] - cstart; 624b0e389bSBarry Smith if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 634b0e389bSBarry Smith ierr = MatSetValues(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 641eb62cbbSBarry Smith } 651eb62cbbSBarry Smith else { 66227d817aSBarry Smith if (mat->was_assembled) { 67b7c46309SBarry Smith if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 684b0e389bSBarry Smith col = aij->colmap[in[j]] + shift; 69ec8511deSBarry Smith if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 702493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 714b0e389bSBarry Smith col = in[j]; 72d6dfbf8fSBarry Smith } 739e25ed09SBarry Smith } 744b0e389bSBarry Smith else col = in[j]; 754b0e389bSBarry Smith if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 764b0e389bSBarry Smith ierr = MatSetValues(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 771eb62cbbSBarry Smith } 781eb62cbbSBarry Smith } 791eb62cbbSBarry Smith } 801eb62cbbSBarry Smith else { 814b0e389bSBarry Smith if (roworiented) { 824b0e389bSBarry Smith ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 834b0e389bSBarry Smith } 844b0e389bSBarry Smith else { 854b0e389bSBarry Smith row = im[i]; 864b0e389bSBarry Smith for ( j=0; j<n; j++ ) { 874b0e389bSBarry Smith ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 884b0e389bSBarry Smith } 894b0e389bSBarry Smith } 901eb62cbbSBarry Smith } 918a729477SBarry Smith } 928a729477SBarry Smith return 0; 938a729477SBarry Smith } 948a729477SBarry Smith 95b49de8d1SLois Curfman McInnes static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 96b49de8d1SLois Curfman McInnes { 97b49de8d1SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 98b49de8d1SLois Curfman McInnes Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data; 99b49de8d1SLois Curfman McInnes int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 100b49de8d1SLois Curfman McInnes int cstart = aij->cstart, cend = aij->cend,row,col; 101b49de8d1SLois Curfman McInnes int shift = C->indexshift; 102b49de8d1SLois Curfman McInnes 103b49de8d1SLois Curfman McInnes for ( i=0; i<m; i++ ) { 104b49de8d1SLois Curfman McInnes if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative row"); 105b49de8d1SLois Curfman McInnes if (idxm[i] >= aij->M) SETERRQ(1,"MatGetValues_MPIAIJ:Row too large"); 106b49de8d1SLois Curfman McInnes if (idxm[i] >= rstart && idxm[i] < rend) { 107b49de8d1SLois Curfman McInnes row = idxm[i] - rstart; 108b49de8d1SLois Curfman McInnes for ( j=0; j<n; j++ ) { 109b49de8d1SLois Curfman McInnes if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative column"); 110b49de8d1SLois Curfman McInnes if (idxn[j] >= aij->N) SETERRQ(1,"MatGetValues_MPIAIJ:Col too large"); 111b49de8d1SLois Curfman McInnes if (idxn[j] >= cstart && idxn[j] < cend){ 112b49de8d1SLois Curfman McInnes col = idxn[j] - cstart; 113b49de8d1SLois Curfman McInnes ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 114b49de8d1SLois Curfman McInnes } 115b49de8d1SLois Curfman McInnes else { 116b49de8d1SLois Curfman McInnes col = aij->colmap[idxn[j]] + shift; 117b49de8d1SLois Curfman McInnes ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 118b49de8d1SLois Curfman McInnes } 119b49de8d1SLois Curfman McInnes } 120b49de8d1SLois Curfman McInnes } 121b49de8d1SLois Curfman McInnes else { 122b49de8d1SLois Curfman McInnes SETERRQ(1,"MatGetValues_MPIAIJ:Only local values currently supported"); 123b49de8d1SLois Curfman McInnes } 124b49de8d1SLois Curfman McInnes } 125b49de8d1SLois Curfman McInnes return 0; 126b49de8d1SLois Curfman McInnes } 127b49de8d1SLois Curfman McInnes 128fc76ce87SLois Curfman McInnes static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 1298a729477SBarry Smith { 13044a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 131d6dfbf8fSBarry Smith MPI_Comm comm = mat->comm; 13217699dbbSLois Curfman McInnes int size = aij->size, *owners = aij->rowners; 13317699dbbSLois Curfman McInnes int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 1341eb62cbbSBarry Smith MPI_Request *send_waits,*recv_waits; 1356abc6512SBarry Smith int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 1361eb62cbbSBarry Smith InsertMode addv; 1371eb62cbbSBarry Smith Scalar *rvalues,*svalues; 1381eb62cbbSBarry Smith 1391eb62cbbSBarry Smith /* make sure all processors are either in INSERTMODE or ADDMODE */ 140d65a2f8fSBarry Smith MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 141dbb450caSBarry Smith if (addv == (ADD_VALUES|INSERT_VALUES)) { 142bbb6d6a8SBarry Smith SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added"); 1431eb62cbbSBarry Smith } 1441eb62cbbSBarry Smith aij->insertmode = addv; /* in case this processor had no cache */ 1451eb62cbbSBarry Smith 1461eb62cbbSBarry Smith /* first count number of contributors to each processor */ 1470452661fSBarry Smith nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 148cddf8d76SBarry Smith PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 1490452661fSBarry Smith owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 1501eb62cbbSBarry Smith for ( i=0; i<aij->stash.n; i++ ) { 1511eb62cbbSBarry Smith idx = aij->stash.idx[i]; 15217699dbbSLois Curfman McInnes for ( j=0; j<size; j++ ) { 1531eb62cbbSBarry Smith if (idx >= owners[j] && idx < owners[j+1]) { 1541eb62cbbSBarry Smith nprocs[j]++; procs[j] = 1; owner[i] = j; break; 1558a729477SBarry Smith } 1568a729477SBarry Smith } 1578a729477SBarry Smith } 15817699dbbSLois Curfman McInnes nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 1591eb62cbbSBarry Smith 1601eb62cbbSBarry Smith /* inform other processors of number of messages and max length*/ 1610452661fSBarry Smith work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 16217699dbbSLois Curfman McInnes MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 16317699dbbSLois Curfman McInnes nreceives = work[rank]; 16417699dbbSLois Curfman McInnes MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 16517699dbbSLois Curfman McInnes nmax = work[rank]; 1660452661fSBarry Smith PetscFree(work); 1671eb62cbbSBarry Smith 1681eb62cbbSBarry Smith /* post receives: 1691eb62cbbSBarry Smith 1) each message will consist of ordered pairs 1701eb62cbbSBarry Smith (global index,value) we store the global index as a double 171d6dfbf8fSBarry Smith to simplify the message passing. 1721eb62cbbSBarry Smith 2) since we don't know how long each individual message is we 1731eb62cbbSBarry Smith allocate the largest needed buffer for each receive. Potentially 1741eb62cbbSBarry Smith this is a lot of wasted space. 1751eb62cbbSBarry Smith 1761eb62cbbSBarry Smith 1771eb62cbbSBarry Smith This could be done better. 1781eb62cbbSBarry Smith */ 1790452661fSBarry Smith rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 18078b31e54SBarry Smith CHKPTRQ(rvalues); 1810452661fSBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 18278b31e54SBarry Smith CHKPTRQ(recv_waits); 1831eb62cbbSBarry Smith for ( i=0; i<nreceives; i++ ) { 184416022c9SBarry Smith MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 1851eb62cbbSBarry Smith comm,recv_waits+i); 1861eb62cbbSBarry Smith } 1871eb62cbbSBarry Smith 1881eb62cbbSBarry Smith /* do sends: 1891eb62cbbSBarry Smith 1) starts[i] gives the starting index in svalues for stuff going to 1901eb62cbbSBarry Smith the ith processor 1911eb62cbbSBarry Smith */ 1920452661fSBarry Smith svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 1930452661fSBarry Smith send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 19478b31e54SBarry Smith CHKPTRQ(send_waits); 1950452661fSBarry Smith starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 1961eb62cbbSBarry Smith starts[0] = 0; 19717699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1981eb62cbbSBarry Smith for ( i=0; i<aij->stash.n; i++ ) { 1991eb62cbbSBarry Smith svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 2001eb62cbbSBarry Smith svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 2011eb62cbbSBarry Smith svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 2021eb62cbbSBarry Smith } 2030452661fSBarry Smith PetscFree(owner); 2041eb62cbbSBarry Smith starts[0] = 0; 20517699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 2061eb62cbbSBarry Smith count = 0; 20717699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 2081eb62cbbSBarry Smith if (procs[i]) { 209416022c9SBarry Smith MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 2101eb62cbbSBarry Smith comm,send_waits+count++); 2111eb62cbbSBarry Smith } 2121eb62cbbSBarry Smith } 2130452661fSBarry Smith PetscFree(starts); PetscFree(nprocs); 2141eb62cbbSBarry Smith 2151eb62cbbSBarry Smith /* Free cache space */ 216*2191d07cSBarry Smith PLogInfo(0,"[%d]MatAssemblyBegin_MPIAIJ:Number of off processor values %d\n",rank,aij->stash.n); 21778b31e54SBarry Smith ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 2181eb62cbbSBarry Smith 2191eb62cbbSBarry Smith aij->svalues = svalues; aij->rvalues = rvalues; 2201eb62cbbSBarry Smith aij->nsends = nsends; aij->nrecvs = nreceives; 2211eb62cbbSBarry Smith aij->send_waits = send_waits; aij->recv_waits = recv_waits; 2221eb62cbbSBarry Smith aij->rmax = nmax; 2231eb62cbbSBarry Smith 2241eb62cbbSBarry Smith return 0; 2251eb62cbbSBarry Smith } 22644a69424SLois Curfman McInnes extern int MatSetUpMultiply_MPIAIJ(Mat); 2271eb62cbbSBarry Smith 228fc76ce87SLois Curfman McInnes static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 2291eb62cbbSBarry Smith { 23044a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 231dbb450caSBarry Smith Mat_SeqAIJ *C = (Mat_SeqAIJ *) aij->A->data; 2321eb62cbbSBarry Smith MPI_Status *send_status,recv_status; 233416022c9SBarry Smith int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 234416022c9SBarry Smith int row,col,other_disassembled,shift = C->indexshift; 2351eb62cbbSBarry Smith Scalar *values,val; 2361eb62cbbSBarry Smith InsertMode addv = aij->insertmode; 2371eb62cbbSBarry Smith 2381eb62cbbSBarry Smith /* wait on receives */ 2391eb62cbbSBarry Smith while (count) { 240d6dfbf8fSBarry Smith MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 2411eb62cbbSBarry Smith /* unpack receives into our local space */ 242d6dfbf8fSBarry Smith values = aij->rvalues + 3*imdex*aij->rmax; 243c60448a5SBarry Smith MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 2441eb62cbbSBarry Smith n = n/3; 2451eb62cbbSBarry Smith for ( i=0; i<n; i++ ) { 246227d817aSBarry Smith row = (int) PetscReal(values[3*i]) - aij->rstart; 247227d817aSBarry Smith col = (int) PetscReal(values[3*i+1]); 2481eb62cbbSBarry Smith val = values[3*i+2]; 2491eb62cbbSBarry Smith if (col >= aij->cstart && col < aij->cend) { 2501eb62cbbSBarry Smith col -= aij->cstart; 2511eb62cbbSBarry Smith MatSetValues(aij->A,1,&row,1,&col,&val,addv); 2521eb62cbbSBarry Smith } 2531eb62cbbSBarry Smith else { 254227d817aSBarry Smith if (mat->was_assembled) { 255b7c46309SBarry Smith if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 256dbb450caSBarry Smith col = aij->colmap[col] + shift; 257ec8511deSBarry Smith if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 2582493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 259227d817aSBarry Smith col = (int) PetscReal(values[3*i+1]); 260d6dfbf8fSBarry Smith } 2619e25ed09SBarry Smith } 2621eb62cbbSBarry Smith MatSetValues(aij->B,1,&row,1,&col,&val,addv); 2631eb62cbbSBarry Smith } 2641eb62cbbSBarry Smith } 2651eb62cbbSBarry Smith count--; 2661eb62cbbSBarry Smith } 2670452661fSBarry Smith PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 2681eb62cbbSBarry Smith 2691eb62cbbSBarry Smith /* wait on sends */ 2701eb62cbbSBarry Smith if (aij->nsends) { 2710452661fSBarry Smith send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status)); 27278b31e54SBarry Smith CHKPTRQ(send_status); 2731eb62cbbSBarry Smith MPI_Waitall(aij->nsends,aij->send_waits,send_status); 2740452661fSBarry Smith PetscFree(send_status); 2751eb62cbbSBarry Smith } 2760452661fSBarry Smith PetscFree(aij->send_waits); PetscFree(aij->svalues); 2771eb62cbbSBarry Smith 27864119d58SLois Curfman McInnes aij->insertmode = NOT_SET_VALUES; 27978b31e54SBarry Smith ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 28078b31e54SBarry Smith ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 2811eb62cbbSBarry Smith 2822493cbb0SBarry Smith /* determine if any processor has disassembled, if so we must 2832493cbb0SBarry Smith also disassemble ourselfs, in order that we may reassemble. */ 284227d817aSBarry Smith MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 285227d817aSBarry Smith if (mat->was_assembled && !other_disassembled) { 2862493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 2872493cbb0SBarry Smith } 2882493cbb0SBarry Smith 289227d817aSBarry Smith if (!mat->was_assembled && mode == FINAL_ASSEMBLY) { 29078b31e54SBarry Smith ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 2915e42470aSBarry Smith } 29278b31e54SBarry Smith ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 29378b31e54SBarry Smith ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 2945e42470aSBarry Smith 2958a729477SBarry Smith return 0; 2968a729477SBarry Smith } 2978a729477SBarry Smith 2982ee70a88SLois Curfman McInnes static int MatZeroEntries_MPIAIJ(Mat A) 2991eb62cbbSBarry Smith { 30044a69424SLois Curfman McInnes Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 301dbd7a890SLois Curfman McInnes int ierr; 30278b31e54SBarry Smith ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 30378b31e54SBarry Smith ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 3041eb62cbbSBarry Smith return 0; 3051eb62cbbSBarry Smith } 3061eb62cbbSBarry Smith 3071eb62cbbSBarry Smith /* the code does not do the diagonal entries correctly unless the 3081eb62cbbSBarry Smith matrix is square and the column and row owerships are identical. 3091eb62cbbSBarry Smith This is a BUG. The only way to fix it seems to be to access 3101eb62cbbSBarry Smith aij->A and aij->B directly and not through the MatZeroRows() 3111eb62cbbSBarry Smith routine. 3121eb62cbbSBarry Smith */ 31344a69424SLois Curfman McInnes static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 3141eb62cbbSBarry Smith { 31544a69424SLois Curfman McInnes Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 31617699dbbSLois Curfman McInnes int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 3176abc6512SBarry Smith int *procs,*nprocs,j,found,idx,nsends,*work; 31817699dbbSLois Curfman McInnes int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 3195392566eSBarry Smith int *rvalues,tag = A->tag,count,base,slen,n,*source; 320d6dfbf8fSBarry Smith int *lens,imdex,*lrows,*values; 321d6dfbf8fSBarry Smith MPI_Comm comm = A->comm; 3221eb62cbbSBarry Smith MPI_Request *send_waits,*recv_waits; 3231eb62cbbSBarry Smith MPI_Status recv_status,*send_status; 3241eb62cbbSBarry Smith IS istmp; 3251eb62cbbSBarry Smith 32678b31e54SBarry Smith ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr); 32778b31e54SBarry Smith ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 3281eb62cbbSBarry Smith 3291eb62cbbSBarry Smith /* first count number of contributors to each processor */ 3300452661fSBarry Smith nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 331cddf8d76SBarry Smith PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 3320452661fSBarry Smith owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 3331eb62cbbSBarry Smith for ( i=0; i<N; i++ ) { 3341eb62cbbSBarry Smith idx = rows[i]; 3351eb62cbbSBarry Smith found = 0; 33617699dbbSLois Curfman McInnes for ( j=0; j<size; j++ ) { 3371eb62cbbSBarry Smith if (idx >= owners[j] && idx < owners[j+1]) { 3381eb62cbbSBarry Smith nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 3391eb62cbbSBarry Smith } 3401eb62cbbSBarry Smith } 341bbb6d6a8SBarry Smith if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range"); 3421eb62cbbSBarry Smith } 34317699dbbSLois Curfman McInnes nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 3441eb62cbbSBarry Smith 3451eb62cbbSBarry Smith /* inform other processors of number of messages and max length*/ 3460452661fSBarry Smith work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 34717699dbbSLois Curfman McInnes MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 34817699dbbSLois Curfman McInnes nrecvs = work[rank]; 34917699dbbSLois Curfman McInnes MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 35017699dbbSLois Curfman McInnes nmax = work[rank]; 3510452661fSBarry Smith PetscFree(work); 3521eb62cbbSBarry Smith 3531eb62cbbSBarry Smith /* post receives: */ 3540452661fSBarry Smith rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 35578b31e54SBarry Smith CHKPTRQ(rvalues); 3560452661fSBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 35778b31e54SBarry Smith CHKPTRQ(recv_waits); 3581eb62cbbSBarry Smith for ( i=0; i<nrecvs; i++ ) { 359416022c9SBarry Smith MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 3601eb62cbbSBarry Smith } 3611eb62cbbSBarry Smith 3621eb62cbbSBarry Smith /* do sends: 3631eb62cbbSBarry Smith 1) starts[i] gives the starting index in svalues for stuff going to 3641eb62cbbSBarry Smith the ith processor 3651eb62cbbSBarry Smith */ 3660452661fSBarry Smith svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 3670452661fSBarry Smith send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 36878b31e54SBarry Smith CHKPTRQ(send_waits); 3690452661fSBarry Smith starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 3701eb62cbbSBarry Smith starts[0] = 0; 37117699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 3721eb62cbbSBarry Smith for ( i=0; i<N; i++ ) { 3731eb62cbbSBarry Smith svalues[starts[owner[i]]++] = rows[i]; 3741eb62cbbSBarry Smith } 3751eb62cbbSBarry Smith ISRestoreIndices(is,&rows); 3761eb62cbbSBarry Smith 3771eb62cbbSBarry Smith starts[0] = 0; 37817699dbbSLois Curfman McInnes for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 3791eb62cbbSBarry Smith count = 0; 38017699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 3811eb62cbbSBarry Smith if (procs[i]) { 382416022c9SBarry Smith MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 3831eb62cbbSBarry Smith } 3841eb62cbbSBarry Smith } 3850452661fSBarry Smith PetscFree(starts); 3861eb62cbbSBarry Smith 38717699dbbSLois Curfman McInnes base = owners[rank]; 3881eb62cbbSBarry Smith 3891eb62cbbSBarry Smith /* wait on receives */ 3900452661fSBarry Smith lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 3911eb62cbbSBarry Smith source = lens + nrecvs; 3921eb62cbbSBarry Smith count = nrecvs; slen = 0; 3931eb62cbbSBarry Smith while (count) { 394d6dfbf8fSBarry Smith MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 3951eb62cbbSBarry Smith /* unpack receives into our local space */ 3961eb62cbbSBarry Smith MPI_Get_count(&recv_status,MPI_INT,&n); 397d6dfbf8fSBarry Smith source[imdex] = recv_status.MPI_SOURCE; 398d6dfbf8fSBarry Smith lens[imdex] = n; 3991eb62cbbSBarry Smith slen += n; 4001eb62cbbSBarry Smith count--; 4011eb62cbbSBarry Smith } 4020452661fSBarry Smith PetscFree(recv_waits); 4031eb62cbbSBarry Smith 4041eb62cbbSBarry Smith /* move the data into the send scatter */ 4050452661fSBarry Smith lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 4061eb62cbbSBarry Smith count = 0; 4071eb62cbbSBarry Smith for ( i=0; i<nrecvs; i++ ) { 4081eb62cbbSBarry Smith values = rvalues + i*nmax; 4091eb62cbbSBarry Smith for ( j=0; j<lens[i]; j++ ) { 4101eb62cbbSBarry Smith lrows[count++] = values[j] - base; 4111eb62cbbSBarry Smith } 4121eb62cbbSBarry Smith } 4130452661fSBarry Smith PetscFree(rvalues); PetscFree(lens); 4140452661fSBarry Smith PetscFree(owner); PetscFree(nprocs); 4151eb62cbbSBarry Smith 4161eb62cbbSBarry Smith /* actually zap the local rows */ 417416022c9SBarry Smith ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 418464493b3SBarry Smith PLogObjectParent(A,istmp); 4190452661fSBarry Smith PetscFree(lrows); 42078b31e54SBarry Smith ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 42178b31e54SBarry Smith ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 42278b31e54SBarry Smith ierr = ISDestroy(istmp); CHKERRQ(ierr); 4231eb62cbbSBarry Smith 4241eb62cbbSBarry Smith /* wait on sends */ 4251eb62cbbSBarry Smith if (nsends) { 4260452661fSBarry Smith send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 42778b31e54SBarry Smith CHKPTRQ(send_status); 4281eb62cbbSBarry Smith MPI_Waitall(nsends,send_waits,send_status); 4290452661fSBarry Smith PetscFree(send_status); 4301eb62cbbSBarry Smith } 4310452661fSBarry Smith PetscFree(send_waits); PetscFree(svalues); 4321eb62cbbSBarry Smith 4331eb62cbbSBarry Smith return 0; 4341eb62cbbSBarry Smith } 4351eb62cbbSBarry Smith 436416022c9SBarry Smith static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 4371eb62cbbSBarry Smith { 438416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 4391eb62cbbSBarry Smith int ierr; 440416022c9SBarry Smith 44164119d58SLois Curfman McInnes ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 442416022c9SBarry Smith ierr = MatMult(a->A,xx,yy); CHKERRQ(ierr); 44364119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 444416022c9SBarry Smith ierr = MatMultAdd(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 4451eb62cbbSBarry Smith return 0; 4461eb62cbbSBarry Smith } 4471eb62cbbSBarry Smith 448416022c9SBarry Smith static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 449da3a660dSBarry Smith { 450416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 451da3a660dSBarry Smith int ierr; 45264119d58SLois Curfman McInnes ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 453416022c9SBarry Smith ierr = MatMultAdd(a->A,xx,yy,zz); CHKERRQ(ierr); 45464119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 455416022c9SBarry Smith ierr = MatMultAdd(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 456da3a660dSBarry Smith return 0; 457da3a660dSBarry Smith } 458da3a660dSBarry Smith 459416022c9SBarry Smith static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 460da3a660dSBarry Smith { 461416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 462da3a660dSBarry Smith int ierr; 463da3a660dSBarry Smith 464da3a660dSBarry Smith /* do nondiagonal part */ 465416022c9SBarry Smith ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr); 466da3a660dSBarry Smith /* send it on its way */ 467416022c9SBarry Smith ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES, 46864119d58SLois Curfman McInnes (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 469da3a660dSBarry Smith /* do local part */ 470416022c9SBarry Smith ierr = MatMultTrans(a->A,xx,yy); CHKERRQ(ierr); 471da3a660dSBarry Smith /* receive remote parts: note this assumes the values are not actually */ 472da3a660dSBarry Smith /* inserted in yy until the next line, which is true for my implementation*/ 473da3a660dSBarry Smith /* but is not perhaps always true. */ 474416022c9SBarry Smith ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES, 47564119d58SLois Curfman McInnes (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 476da3a660dSBarry Smith return 0; 477da3a660dSBarry Smith } 478da3a660dSBarry Smith 479416022c9SBarry Smith static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 480da3a660dSBarry Smith { 481416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 482da3a660dSBarry Smith int ierr; 483da3a660dSBarry Smith 484da3a660dSBarry Smith /* do nondiagonal part */ 485416022c9SBarry Smith ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr); 486da3a660dSBarry Smith /* send it on its way */ 487416022c9SBarry Smith ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES, 48864119d58SLois Curfman McInnes (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 489da3a660dSBarry Smith /* do local part */ 490416022c9SBarry Smith ierr = MatMultTransAdd(a->A,xx,yy,zz); CHKERRQ(ierr); 491da3a660dSBarry Smith /* receive remote parts: note this assumes the values are not actually */ 492da3a660dSBarry Smith /* inserted in yy until the next line, which is true for my implementation*/ 493da3a660dSBarry Smith /* but is not perhaps always true. */ 494416022c9SBarry Smith ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES, 49564119d58SLois Curfman McInnes (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 496da3a660dSBarry Smith return 0; 497da3a660dSBarry Smith } 498da3a660dSBarry Smith 4991eb62cbbSBarry Smith /* 5001eb62cbbSBarry Smith This only works correctly for square matrices where the subblock A->A is the 5011eb62cbbSBarry Smith diagonal block 5021eb62cbbSBarry Smith */ 503416022c9SBarry Smith static int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 5041eb62cbbSBarry Smith { 505416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 506416022c9SBarry Smith return MatGetDiagonal(a->A,v); 5071eb62cbbSBarry Smith } 5081eb62cbbSBarry Smith 509052efed2SBarry Smith static int MatScale_MPIAIJ(Scalar *aa,Mat A) 510052efed2SBarry Smith { 511052efed2SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 512052efed2SBarry Smith int ierr; 513052efed2SBarry Smith ierr = MatScale(aa,a->A); CHKERRQ(ierr); 514052efed2SBarry Smith ierr = MatScale(aa,a->B); CHKERRQ(ierr); 515052efed2SBarry Smith return 0; 516052efed2SBarry Smith } 517052efed2SBarry Smith 51844a69424SLois Curfman McInnes static int MatDestroy_MPIAIJ(PetscObject obj) 5191eb62cbbSBarry Smith { 5201eb62cbbSBarry Smith Mat mat = (Mat) obj; 52144a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 5221eb62cbbSBarry Smith int ierr; 523a5a9c739SBarry Smith #if defined(PETSC_LOG) 5246e35fa57SLois Curfman McInnes PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 525a5a9c739SBarry Smith #endif 5260452661fSBarry Smith PetscFree(aij->rowners); 52778b31e54SBarry Smith ierr = MatDestroy(aij->A); CHKERRQ(ierr); 52878b31e54SBarry Smith ierr = MatDestroy(aij->B); CHKERRQ(ierr); 5290452661fSBarry Smith if (aij->colmap) PetscFree(aij->colmap); 5300452661fSBarry Smith if (aij->garray) PetscFree(aij->garray); 5311eb62cbbSBarry Smith if (aij->lvec) VecDestroy(aij->lvec); 532a56f8943SBarry Smith if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 5330452661fSBarry Smith PetscFree(aij); 534a5a9c739SBarry Smith PLogObjectDestroy(mat); 5350452661fSBarry Smith PetscHeaderDestroy(mat); 5361eb62cbbSBarry Smith return 0; 5371eb62cbbSBarry Smith } 5387857610eSBarry Smith #include "draw.h" 539b16a3bb1SBarry Smith #include "pinclude/pviewer.h" 540ee50ffe9SBarry Smith 541416022c9SBarry Smith static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 5421eb62cbbSBarry Smith { 543416022c9SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 544416022c9SBarry Smith int ierr; 545416022c9SBarry Smith 54617699dbbSLois Curfman McInnes if (aij->size == 1) { 547416022c9SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 548416022c9SBarry Smith } 549a523beb4SLois Curfman McInnes else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported"); 550416022c9SBarry Smith return 0; 551416022c9SBarry Smith } 552416022c9SBarry Smith 553d7e8b826SBarry Smith static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 554416022c9SBarry Smith { 55544a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 556dbb450caSBarry Smith Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 557a56f8943SBarry Smith int ierr, format,shift = C->indexshift,rank; 558d13ab20cSBarry Smith PetscObject vobj = (PetscObject) viewer; 559d636dbe3SBarry Smith FILE *fd; 560416022c9SBarry Smith 561416022c9SBarry Smith if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) { 562416022c9SBarry Smith ierr = ViewerFileGetFormat_Private(viewer,&format); 56308480c60SBarry Smith if (format == FILE_FORMAT_INFO_DETAILED) { 564a56f8943SBarry Smith int nz,nzalloc,mem; 565a56f8943SBarry Smith MPI_Comm_rank(mat->comm,&rank); 566227d817aSBarry Smith ierr = ViewerFileGetPointer(viewer,&fd); CHKERRQ(ierr); 567a56f8943SBarry Smith ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem); 568a56f8943SBarry Smith MPIU_Seq_begin(mat->comm,1); 569a56f8943SBarry Smith fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d \n",rank,aij->m,nz, 570a56f8943SBarry Smith nzalloc,mem); 57108480c60SBarry Smith ierr = MatGetInfo(aij->A,MAT_LOCAL,&nz,&nzalloc,&mem); 57208480c60SBarry Smith fprintf(fd,"[%d] on diagonal nz %d \n",rank,nz); 57308480c60SBarry Smith ierr = MatGetInfo(aij->B,MAT_LOCAL,&nz,&nzalloc,&mem); 57408480c60SBarry Smith fprintf(fd,"[%d] off diagonal nz %d \n",rank,nz); 575a56f8943SBarry Smith fflush(fd); 576a56f8943SBarry Smith MPIU_Seq_end(mat->comm,1); 577a40aa06bSLois Curfman McInnes ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 578416022c9SBarry Smith return 0; 579416022c9SBarry Smith } 58008480c60SBarry Smith else if (format == FILE_FORMAT_INFO) { 58108480c60SBarry Smith return 0; 58208480c60SBarry Smith } 583416022c9SBarry Smith } 584416022c9SBarry Smith 585416022c9SBarry Smith if (vobj->type == ASCII_FILE_VIEWER) { 586227d817aSBarry Smith ierr = ViewerFileGetPointer(viewer,&fd); CHKERRQ(ierr); 5877f813858SBarry Smith MPIU_Seq_begin(mat->comm,1); 588d13ab20cSBarry Smith fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 58917699dbbSLois Curfman McInnes aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 5901eb62cbbSBarry Smith aij->cend); 59178b31e54SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 59278b31e54SBarry Smith ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 593d13ab20cSBarry Smith fflush(fd); 5947f813858SBarry Smith MPIU_Seq_end(mat->comm,1); 595d13ab20cSBarry Smith } 596416022c9SBarry Smith else { 597a56f8943SBarry Smith int size = aij->size; 598a56f8943SBarry Smith rank = aij->rank; 59917699dbbSLois Curfman McInnes if (size == 1) { 60078b31e54SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 60195373324SBarry Smith } 60295373324SBarry Smith else { 60395373324SBarry Smith /* assemble the entire matrix onto first processor. */ 60495373324SBarry Smith Mat A; 605ec8511deSBarry Smith Mat_SeqAIJ *Aloc; 6062eb8c8abSBarry Smith int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 60795373324SBarry Smith Scalar *a; 6082ee70a88SLois Curfman McInnes 60917699dbbSLois Curfman McInnes if (!rank) { 610b4fd4287SBarry Smith ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 611c451ab25SLois Curfman McInnes CHKERRQ(ierr); 61295373324SBarry Smith } 61395373324SBarry Smith else { 614b4fd4287SBarry Smith ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 615c451ab25SLois Curfman McInnes CHKERRQ(ierr); 61695373324SBarry Smith } 617464493b3SBarry Smith PLogObjectParent(mat,A); 618416022c9SBarry Smith 61995373324SBarry Smith /* copy over the A part */ 620ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) aij->A->data; 6212ee70a88SLois Curfman McInnes m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 62295373324SBarry Smith row = aij->rstart; 623dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 62495373324SBarry Smith for ( i=0; i<m; i++ ) { 625416022c9SBarry Smith ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 62695373324SBarry Smith row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 62795373324SBarry Smith } 6282ee70a88SLois Curfman McInnes aj = Aloc->j; 629dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 63095373324SBarry Smith 63195373324SBarry Smith /* copy over the B part */ 632ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) aij->B->data; 6332ee70a88SLois Curfman McInnes m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 63495373324SBarry Smith row = aij->rstart; 6350452661fSBarry Smith ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 636dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 63795373324SBarry Smith for ( i=0; i<m; i++ ) { 638416022c9SBarry Smith ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 63995373324SBarry Smith row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 64095373324SBarry Smith } 6410452661fSBarry Smith PetscFree(ct); 64278b31e54SBarry Smith ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 64378b31e54SBarry Smith ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 64417699dbbSLois Curfman McInnes if (!rank) { 64578b31e54SBarry Smith ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 64695373324SBarry Smith } 64778b31e54SBarry Smith ierr = MatDestroy(A); CHKERRQ(ierr); 64895373324SBarry Smith } 64995373324SBarry Smith } 6501eb62cbbSBarry Smith return 0; 6511eb62cbbSBarry Smith } 6521eb62cbbSBarry Smith 653416022c9SBarry Smith static int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 654416022c9SBarry Smith { 655416022c9SBarry Smith Mat mat = (Mat) obj; 656416022c9SBarry Smith int ierr; 657416022c9SBarry Smith PetscObject vobj = (PetscObject) viewer; 658416022c9SBarry Smith 659416022c9SBarry Smith if (!viewer) { 660416022c9SBarry Smith viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer; 661416022c9SBarry Smith } 662416022c9SBarry Smith if (vobj->cookie == DRAW_COOKIE && vobj->type == NULLWINDOW) return 0; 663416022c9SBarry Smith else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) { 664d7e8b826SBarry Smith ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 665416022c9SBarry Smith } 666416022c9SBarry Smith else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) { 667d7e8b826SBarry Smith ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 668d7e8b826SBarry Smith } 669d7e8b826SBarry Smith else if (vobj->cookie == VIEWER_COOKIE && vobj->type == MATLAB_VIEWER) { 670d7e8b826SBarry Smith ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 671416022c9SBarry Smith } 672416022c9SBarry Smith else if (vobj->cookie == DRAW_COOKIE) { 673d7e8b826SBarry Smith ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 674416022c9SBarry Smith } 675416022c9SBarry Smith else if (vobj->type == BINARY_FILE_VIEWER) { 676416022c9SBarry Smith return MatView_MPIAIJ_Binary(mat,viewer); 677416022c9SBarry Smith } 678416022c9SBarry Smith return 0; 679416022c9SBarry Smith } 680416022c9SBarry Smith 681ec8511deSBarry Smith extern int MatMarkDiag_SeqAIJ(Mat); 6821eb62cbbSBarry Smith /* 6831eb62cbbSBarry Smith This has to provide several versions. 6841eb62cbbSBarry Smith 6851eb62cbbSBarry Smith 1) per sequential 6861eb62cbbSBarry Smith 2) a) use only local smoothing updating outer values only once. 6871eb62cbbSBarry Smith b) local smoothing updating outer values each inner iteration 688d6dfbf8fSBarry Smith 3) color updating out values betwen colors. 6891eb62cbbSBarry Smith */ 690fc76ce87SLois Curfman McInnes static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 691dbb450caSBarry Smith double fshift,int its,Vec xx) 6928a729477SBarry Smith { 69344a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 694d6dfbf8fSBarry Smith Mat AA = mat->A, BB = mat->B; 695ec8511deSBarry Smith Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 6966abc6512SBarry Smith Scalar zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts; 6976abc6512SBarry Smith int ierr,*idx, *diag; 698416022c9SBarry Smith int n = mat->n, m = mat->m, i,shift = A->indexshift; 699da3a660dSBarry Smith Vec tt; 7008a729477SBarry Smith 701d6dfbf8fSBarry Smith VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 702dbb450caSBarry Smith xs = x + shift; /* shift by one for index start of 1 */ 703dbb450caSBarry Smith ls = ls + shift; 704ec8511deSBarry Smith if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;} 705d6dfbf8fSBarry Smith diag = A->diag; 706acb40c82SBarry Smith if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) { 70748d91487SBarry Smith SETERRQ(1,"MatRelax_MPIAIJ:Option not supported"); 708acb40c82SBarry Smith } 709da3a660dSBarry Smith if (flag & SOR_EISENSTAT) { 710da3a660dSBarry Smith /* Let A = L + U + D; where L is lower trianglar, 711da3a660dSBarry Smith U is upper triangular, E is diagonal; This routine applies 712da3a660dSBarry Smith 713da3a660dSBarry Smith (L + E)^{-1} A (U + E)^{-1} 714da3a660dSBarry Smith 715da3a660dSBarry Smith to a vector efficiently using Eisenstat's trick. This is for 716da3a660dSBarry Smith the case of SSOR preconditioner, so E is D/omega where omega 717da3a660dSBarry Smith is the relaxation factor. 718da3a660dSBarry Smith */ 71978b31e54SBarry Smith ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr); 720464493b3SBarry Smith PLogObjectParent(matin,tt); 721da3a660dSBarry Smith VecGetArray(tt,&t); 722da3a660dSBarry Smith scale = (2.0/omega) - 1.0; 723da3a660dSBarry Smith /* x = (E + U)^{-1} b */ 724da3a660dSBarry Smith VecSet(&zero,mat->lvec); 72564119d58SLois Curfman McInnes ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 72678b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 727da3a660dSBarry Smith for ( i=m-1; i>-1; i-- ) { 728da3a660dSBarry Smith n = A->i[i+1] - diag[i] - 1; 729dbb450caSBarry Smith idx = A->j + diag[i] + !shift; 730dbb450caSBarry Smith v = A->a + diag[i] + !shift; 731da3a660dSBarry Smith sum = b[i]; 732da3a660dSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 733dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 734da3a660dSBarry Smith n = B->i[i+1] - B->i[i]; 735dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 736dbb450caSBarry Smith v = B->a + B->i[i] + shift; 737da3a660dSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 738da3a660dSBarry Smith x[i] = omega*(sum/d); 739da3a660dSBarry Smith } 74064119d58SLois Curfman McInnes ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 74178b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 742da3a660dSBarry Smith 743da3a660dSBarry Smith /* t = b - (2*E - D)x */ 744da3a660dSBarry Smith v = A->a; 745dbb450caSBarry Smith for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ + shift])*x[i]; } 746da3a660dSBarry Smith 747da3a660dSBarry Smith /* t = (E + L)^{-1}t */ 748dbb450caSBarry Smith ts = t + shift; /* shifted by one for index start of a or mat->j*/ 749da3a660dSBarry Smith diag = A->diag; 750da3a660dSBarry Smith VecSet(&zero,mat->lvec); 75164119d58SLois Curfman McInnes ierr = VecPipelineBegin(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 75278b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 753da3a660dSBarry Smith for ( i=0; i<m; i++ ) { 754da3a660dSBarry Smith n = diag[i] - A->i[i]; 755dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 756dbb450caSBarry Smith v = A->a + A->i[i] + shift; 757da3a660dSBarry Smith sum = t[i]; 758da3a660dSBarry Smith SPARSEDENSEMDOT(sum,ts,v,idx,n); 759dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 760da3a660dSBarry Smith n = B->i[i+1] - B->i[i]; 761dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 762dbb450caSBarry Smith v = B->a + B->i[i] + shift; 763da3a660dSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 764da3a660dSBarry Smith t[i] = omega*(sum/d); 765da3a660dSBarry Smith } 76664119d58SLois Curfman McInnes ierr = VecPipelineEnd(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 76778b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 768da3a660dSBarry Smith /* x = x + t */ 769da3a660dSBarry Smith for ( i=0; i<m; i++ ) { x[i] += t[i]; } 770da3a660dSBarry Smith VecDestroy(tt); 771da3a660dSBarry Smith return 0; 772da3a660dSBarry Smith } 773da3a660dSBarry Smith 7741eb62cbbSBarry Smith 775d6dfbf8fSBarry Smith if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){ 776da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 777da3a660dSBarry Smith VecSet(&zero,mat->lvec); VecSet(&zero,xx); 778da3a660dSBarry Smith } 779da3a660dSBarry Smith else { 78064119d58SLois Curfman McInnes ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 78178b31e54SBarry Smith CHKERRQ(ierr); 78264119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 78378b31e54SBarry Smith CHKERRQ(ierr); 784da3a660dSBarry Smith } 785d6dfbf8fSBarry Smith while (its--) { 786d6dfbf8fSBarry Smith /* go down through the rows */ 78764119d58SLois Curfman McInnes ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 78878b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 789d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 790d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 791dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 792dbb450caSBarry Smith v = A->a + A->i[i] + shift; 793d6dfbf8fSBarry Smith sum = b[i]; 794d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 795dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 796d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 797dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 798dbb450caSBarry Smith v = B->a + B->i[i] + shift; 799d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 800d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 801d6dfbf8fSBarry Smith } 80264119d58SLois Curfman McInnes ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 80378b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 804d6dfbf8fSBarry Smith /* come up through the rows */ 80564119d58SLois Curfman McInnes ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 80678b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 807d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 808d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 809dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 810dbb450caSBarry Smith v = A->a + A->i[i] + shift; 811d6dfbf8fSBarry Smith sum = b[i]; 812d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 813dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 814d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 815dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 816dbb450caSBarry Smith v = B->a + B->i[i] + shift; 817d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 818d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 819d6dfbf8fSBarry Smith } 82064119d58SLois Curfman McInnes ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 82178b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 822d6dfbf8fSBarry Smith } 823d6dfbf8fSBarry Smith } 824d6dfbf8fSBarry Smith else if (flag & SOR_FORWARD_SWEEP){ 825da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 826da3a660dSBarry Smith VecSet(&zero,mat->lvec); 82764119d58SLois Curfman McInnes ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 82878b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 829da3a660dSBarry Smith for ( i=0; i<m; i++ ) { 830da3a660dSBarry Smith n = diag[i] - A->i[i]; 831dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 832dbb450caSBarry Smith v = A->a + A->i[i] + shift; 833da3a660dSBarry Smith sum = b[i]; 834da3a660dSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 835dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 836da3a660dSBarry Smith n = B->i[i+1] - B->i[i]; 837dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 838dbb450caSBarry Smith v = B->a + B->i[i] + shift; 839da3a660dSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 840da3a660dSBarry Smith x[i] = omega*(sum/d); 841da3a660dSBarry Smith } 84264119d58SLois Curfman McInnes ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 84378b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 844da3a660dSBarry Smith its--; 845da3a660dSBarry Smith } 846d6dfbf8fSBarry Smith while (its--) { 84764119d58SLois Curfman McInnes ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 84878b31e54SBarry Smith CHKERRQ(ierr); 84964119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 85078b31e54SBarry Smith CHKERRQ(ierr); 85164119d58SLois Curfman McInnes ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 85278b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 853d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 854d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 855dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 856dbb450caSBarry Smith v = A->a + A->i[i] + shift; 857d6dfbf8fSBarry Smith sum = b[i]; 858d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 859dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 860d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 861dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 862dbb450caSBarry Smith v = B->a + B->i[i] + shift; 863d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 864d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 865d6dfbf8fSBarry Smith } 86664119d58SLois Curfman McInnes ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 86778b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 868d6dfbf8fSBarry Smith } 869d6dfbf8fSBarry Smith } 870d6dfbf8fSBarry Smith else if (flag & SOR_BACKWARD_SWEEP){ 871da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 872da3a660dSBarry Smith VecSet(&zero,mat->lvec); 87364119d58SLois Curfman McInnes ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 87478b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 875da3a660dSBarry Smith for ( i=m-1; i>-1; i-- ) { 876da3a660dSBarry Smith n = A->i[i+1] - diag[i] - 1; 877dbb450caSBarry Smith idx = A->j + diag[i] + !shift; 878dbb450caSBarry Smith v = A->a + diag[i] + !shift; 879da3a660dSBarry Smith sum = b[i]; 880da3a660dSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 881dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 882da3a660dSBarry Smith n = B->i[i+1] - B->i[i]; 883dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 884dbb450caSBarry Smith v = B->a + B->i[i] + shift; 885da3a660dSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 886da3a660dSBarry Smith x[i] = omega*(sum/d); 887da3a660dSBarry Smith } 88864119d58SLois Curfman McInnes ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 88978b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 890da3a660dSBarry Smith its--; 891da3a660dSBarry Smith } 892d6dfbf8fSBarry Smith while (its--) { 89364119d58SLois Curfman McInnes ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN, 89478b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 89564119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN, 89678b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 89764119d58SLois Curfman McInnes ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 89878b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 899d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 900d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 901dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 902dbb450caSBarry Smith v = A->a + A->i[i] + shift; 903d6dfbf8fSBarry Smith sum = b[i]; 904d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 905dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 906d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 907dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 908dbb450caSBarry Smith v = B->a + B->i[i] + shift; 909d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 910d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 911d6dfbf8fSBarry Smith } 91264119d58SLois Curfman McInnes ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 91378b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 914d6dfbf8fSBarry Smith } 915d6dfbf8fSBarry Smith } 916d6dfbf8fSBarry Smith else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 917da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 918dbb450caSBarry Smith return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 919da3a660dSBarry Smith } 92064119d58SLois Curfman McInnes ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 92178b31e54SBarry Smith CHKERRQ(ierr); 92264119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 92378b31e54SBarry Smith CHKERRQ(ierr); 924d6dfbf8fSBarry Smith while (its--) { 925d6dfbf8fSBarry Smith /* go down through the rows */ 926d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 927d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 928dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 929dbb450caSBarry Smith v = A->a + A->i[i] + shift; 930d6dfbf8fSBarry Smith sum = b[i]; 931d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 932dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 933d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 934dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 935dbb450caSBarry Smith v = B->a + B->i[i] + shift; 936d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 937d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 938d6dfbf8fSBarry Smith } 939d6dfbf8fSBarry Smith /* come up through the rows */ 940d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 941d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 942dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 943dbb450caSBarry Smith v = A->a + A->i[i] + shift; 944d6dfbf8fSBarry Smith sum = b[i]; 945d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 946dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 947d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 948dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 949dbb450caSBarry Smith v = B->a + B->i[i] + shift; 950d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 951d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 952d6dfbf8fSBarry Smith } 953d6dfbf8fSBarry Smith } 954d6dfbf8fSBarry Smith } 955d6dfbf8fSBarry Smith else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 956da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 957dbb450caSBarry Smith return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 958da3a660dSBarry Smith } 95964119d58SLois Curfman McInnes ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 96078b31e54SBarry Smith CHKERRQ(ierr); 96164119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 96278b31e54SBarry Smith CHKERRQ(ierr); 963d6dfbf8fSBarry Smith while (its--) { 964d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 965d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 966dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 967dbb450caSBarry Smith v = A->a + A->i[i] + shift; 968d6dfbf8fSBarry Smith sum = b[i]; 969d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 970dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 971d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 972dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 973dbb450caSBarry Smith v = B->a + B->i[i] + shift; 974d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 975d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 976d6dfbf8fSBarry Smith } 977d6dfbf8fSBarry Smith } 978d6dfbf8fSBarry Smith } 979d6dfbf8fSBarry Smith else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 980da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 981dbb450caSBarry Smith return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 982da3a660dSBarry Smith } 98364119d58SLois Curfman McInnes ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 98478b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 98564119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 98678b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 987d6dfbf8fSBarry Smith while (its--) { 988d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 989d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 990dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 991dbb450caSBarry Smith v = A->a + A->i[i] + shift; 992d6dfbf8fSBarry Smith sum = b[i]; 993d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 994dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 995d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 996dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 997dbb450caSBarry Smith v = B->a + B->i[i] + shift; 998d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 999d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 1000d6dfbf8fSBarry Smith } 1001d6dfbf8fSBarry Smith } 1002d6dfbf8fSBarry Smith } 10038a729477SBarry Smith return 0; 10048a729477SBarry Smith } 1005a66be287SLois Curfman McInnes 1006fc76ce87SLois Curfman McInnes static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz, 1007fc76ce87SLois Curfman McInnes int *nzalloc,int *mem) 1008a66be287SLois Curfman McInnes { 1009a66be287SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1010a66be287SLois Curfman McInnes Mat A = mat->A, B = mat->B; 1011a66be287SLois Curfman McInnes int ierr, isend[3], irecv[3], nzA, nzallocA, memA; 1012a66be287SLois Curfman McInnes 101378b31e54SBarry Smith ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr); 101478b31e54SBarry Smith ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr); 1015a66be287SLois Curfman McInnes isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA; 1016a66be287SLois Curfman McInnes if (flag == MAT_LOCAL) { 1017a66be287SLois Curfman McInnes *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2]; 1018a66be287SLois Curfman McInnes } else if (flag == MAT_GLOBAL_MAX) { 1019d65a2f8fSBarry Smith MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm); 1020a66be287SLois Curfman McInnes *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 1021a66be287SLois Curfman McInnes } else if (flag == MAT_GLOBAL_SUM) { 1022d65a2f8fSBarry Smith MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm); 1023a66be287SLois Curfman McInnes *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 1024a66be287SLois Curfman McInnes } 1025a66be287SLois Curfman McInnes return 0; 1026a66be287SLois Curfman McInnes } 1027a66be287SLois Curfman McInnes 1028299609e3SLois Curfman McInnes extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 1029299609e3SLois Curfman McInnes extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 1030299609e3SLois Curfman McInnes extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 1031299609e3SLois Curfman McInnes extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 1032299609e3SLois Curfman McInnes extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 1033299609e3SLois Curfman McInnes extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1034299609e3SLois Curfman McInnes extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 1035299609e3SLois Curfman McInnes extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1036299609e3SLois Curfman McInnes 1037416022c9SBarry Smith static int MatSetOption_MPIAIJ(Mat A,MatOption op) 1038c74985f6SBarry Smith { 1039c0bbcb79SLois Curfman McInnes Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1040c74985f6SBarry Smith 1041c0bbcb79SLois Curfman McInnes if (op == NO_NEW_NONZERO_LOCATIONS || 1042c0bbcb79SLois Curfman McInnes op == YES_NEW_NONZERO_LOCATIONS || 1043c0bbcb79SLois Curfman McInnes op == COLUMNS_SORTED || 1044c0bbcb79SLois Curfman McInnes op == ROW_ORIENTED) { 1045c0bbcb79SLois Curfman McInnes MatSetOption(a->A,op); 1046c0bbcb79SLois Curfman McInnes MatSetOption(a->B,op); 1047c74985f6SBarry Smith } 1048c0bbcb79SLois Curfman McInnes else if (op == ROWS_SORTED || 1049c0bbcb79SLois Curfman McInnes op == SYMMETRIC_MATRIX || 1050c0bbcb79SLois Curfman McInnes op == STRUCTURALLY_SYMMETRIC_MATRIX || 1051c0bbcb79SLois Curfman McInnes op == YES_NEW_DIAGONALS) 1052c0bbcb79SLois Curfman McInnes PLogInfo((PetscObject)A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 10534b0e389bSBarry Smith else if (op == COLUMN_ORIENTED) { 10544b0e389bSBarry Smith a->roworiented = 0; 10554b0e389bSBarry Smith MatSetOption(a->A,op); 10564b0e389bSBarry Smith MatSetOption(a->B,op); 10574b0e389bSBarry Smith } 1058c0bbcb79SLois Curfman McInnes else if (op == NO_NEW_DIAGONALS) 10594d39ef84SLois Curfman McInnes {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:NO_NEW_DIAGONALS");} 1060c0bbcb79SLois Curfman McInnes else 10614d39ef84SLois Curfman McInnes {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");} 1062c74985f6SBarry Smith return 0; 1063c74985f6SBarry Smith } 1064c74985f6SBarry Smith 1065d1710a03SLois Curfman McInnes static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1066c74985f6SBarry Smith { 106744a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1068c74985f6SBarry Smith *m = mat->M; *n = mat->N; 1069c74985f6SBarry Smith return 0; 1070c74985f6SBarry Smith } 1071c74985f6SBarry Smith 1072d1710a03SLois Curfman McInnes static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1073c74985f6SBarry Smith { 107444a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1075b7c46309SBarry Smith *m = mat->m; *n = mat->N; 1076c74985f6SBarry Smith return 0; 1077c74985f6SBarry Smith } 1078c74985f6SBarry Smith 1079d1710a03SLois Curfman McInnes static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1080c74985f6SBarry Smith { 108144a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1082c74985f6SBarry Smith *m = mat->rstart; *n = mat->rend; 1083c74985f6SBarry Smith return 0; 1084c74985f6SBarry Smith } 1085c74985f6SBarry Smith 108639e00950SLois Curfman McInnes static int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 108739e00950SLois Curfman McInnes { 1088154123eaSLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1089154123eaSLois Curfman McInnes Scalar *vworkA, *vworkB, **pvA, **pvB; 1090154123eaSLois Curfman McInnes int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1091154123eaSLois Curfman McInnes int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 109239e00950SLois Curfman McInnes 1093b49de8d1SLois Curfman McInnes if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:Only local rows") 1094abc0e9e4SLois Curfman McInnes lrow = row - rstart; 109539e00950SLois Curfman McInnes 1096154123eaSLois Curfman McInnes pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1097154123eaSLois Curfman McInnes if (!v) {pvA = 0; pvB = 0;} 1098154123eaSLois Curfman McInnes if (!idx) {pcA = 0; if (!v) pcB = 0;} 109978b31e54SBarry Smith ierr = MatGetRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 110078b31e54SBarry Smith ierr = MatGetRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1101154123eaSLois Curfman McInnes nztot = nzA + nzB; 1102154123eaSLois Curfman McInnes 1103154123eaSLois Curfman McInnes if (v || idx) { 1104154123eaSLois Curfman McInnes if (nztot) { 1105154123eaSLois Curfman McInnes /* Sort by increasing column numbers, assuming A and B already sorted */ 1106299609e3SLois Curfman McInnes int imark; 1107154123eaSLois Curfman McInnes for (i=0; i<nzB; i++) cworkB[i] = mat->garray[cworkB[i]]; 1108154123eaSLois Curfman McInnes if (v) { 11090452661fSBarry Smith *v = (Scalar *) PetscMalloc( (nztot)*sizeof(Scalar) ); CHKPTRQ(*v); 111039e00950SLois Curfman McInnes for ( i=0; i<nzB; i++ ) { 1111154123eaSLois Curfman McInnes if (cworkB[i] < cstart) (*v)[i] = vworkB[i]; 1112154123eaSLois Curfman McInnes else break; 1113154123eaSLois Curfman McInnes } 1114154123eaSLois Curfman McInnes imark = i; 1115154123eaSLois Curfman McInnes for ( i=0; i<nzA; i++ ) (*v)[imark+i] = vworkA[i]; 1116299609e3SLois Curfman McInnes for ( i=imark; i<nzB; i++ ) (*v)[nzA+i] = vworkB[i]; 1117154123eaSLois Curfman McInnes } 1118154123eaSLois Curfman McInnes if (idx) { 11190452661fSBarry Smith *idx = (int *) PetscMalloc( (nztot)*sizeof(int) ); CHKPTRQ(*idx); 1120154123eaSLois Curfman McInnes for (i=0; i<nzA; i++) cworkA[i] += cstart; 1121154123eaSLois Curfman McInnes for ( i=0; i<nzB; i++ ) { 1122154123eaSLois Curfman McInnes if (cworkB[i] < cstart) (*idx)[i] = cworkB[i]; 1123154123eaSLois Curfman McInnes else break; 1124154123eaSLois Curfman McInnes } 1125154123eaSLois Curfman McInnes imark = i; 1126154123eaSLois Curfman McInnes for ( i=0; i<nzA; i++ ) (*idx)[imark+i] = cworkA[i]; 1127299609e3SLois Curfman McInnes for ( i=imark; i<nzB; i++ ) (*idx)[nzA+i] = cworkB[i]; 112839e00950SLois Curfman McInnes } 112939e00950SLois Curfman McInnes } 113039e00950SLois Curfman McInnes else {*idx = 0; *v=0;} 1131154123eaSLois Curfman McInnes } 113239e00950SLois Curfman McInnes *nz = nztot; 113378b31e54SBarry Smith ierr = MatRestoreRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 113478b31e54SBarry Smith ierr = MatRestoreRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 113539e00950SLois Curfman McInnes return 0; 113639e00950SLois Curfman McInnes } 113739e00950SLois Curfman McInnes 113839e00950SLois Curfman McInnes static int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 113939e00950SLois Curfman McInnes { 11400452661fSBarry Smith if (idx) PetscFree(*idx); 11410452661fSBarry Smith if (v) PetscFree(*v); 114239e00950SLois Curfman McInnes return 0; 114339e00950SLois Curfman McInnes } 114439e00950SLois Curfman McInnes 1145cddf8d76SBarry Smith static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1146855ac2c5SLois Curfman McInnes { 1147855ac2c5SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1148ec8511deSBarry Smith Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1149416022c9SBarry Smith int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1150416022c9SBarry Smith double sum = 0.0; 115104ca555eSLois Curfman McInnes Scalar *v; 115204ca555eSLois Curfman McInnes 115317699dbbSLois Curfman McInnes if (aij->size == 1) { 115414183eadSLois Curfman McInnes ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 115537fa93a5SLois Curfman McInnes } else { 115604ca555eSLois Curfman McInnes if (type == NORM_FROBENIUS) { 115704ca555eSLois Curfman McInnes v = amat->a; 115804ca555eSLois Curfman McInnes for (i=0; i<amat->nz; i++ ) { 115904ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX) 116004ca555eSLois Curfman McInnes sum += real(conj(*v)*(*v)); v++; 116104ca555eSLois Curfman McInnes #else 116204ca555eSLois Curfman McInnes sum += (*v)*(*v); v++; 116304ca555eSLois Curfman McInnes #endif 116404ca555eSLois Curfman McInnes } 116504ca555eSLois Curfman McInnes v = bmat->a; 116604ca555eSLois Curfman McInnes for (i=0; i<bmat->nz; i++ ) { 116704ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX) 116804ca555eSLois Curfman McInnes sum += real(conj(*v)*(*v)); v++; 116904ca555eSLois Curfman McInnes #else 117004ca555eSLois Curfman McInnes sum += (*v)*(*v); v++; 117104ca555eSLois Curfman McInnes #endif 117204ca555eSLois Curfman McInnes } 117304ca555eSLois Curfman McInnes MPI_Allreduce((void*)&sum,(void*)norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 117404ca555eSLois Curfman McInnes *norm = sqrt(*norm); 117504ca555eSLois Curfman McInnes } 117604ca555eSLois Curfman McInnes else if (type == NORM_1) { /* max column norm */ 117704ca555eSLois Curfman McInnes double *tmp, *tmp2; 117804ca555eSLois Curfman McInnes int *jj, *garray = aij->garray; 11790452661fSBarry Smith tmp = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp); 11800452661fSBarry Smith tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2); 1181cddf8d76SBarry Smith PetscMemzero(tmp,aij->N*sizeof(double)); 118204ca555eSLois Curfman McInnes *norm = 0.0; 118304ca555eSLois Curfman McInnes v = amat->a; jj = amat->j; 118404ca555eSLois Curfman McInnes for ( j=0; j<amat->nz; j++ ) { 1185579c6b6fSBarry Smith tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 118604ca555eSLois Curfman McInnes } 118704ca555eSLois Curfman McInnes v = bmat->a; jj = bmat->j; 118804ca555eSLois Curfman McInnes for ( j=0; j<bmat->nz; j++ ) { 1189579c6b6fSBarry Smith tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 119004ca555eSLois Curfman McInnes } 119104ca555eSLois Curfman McInnes MPI_Allreduce((void*)tmp,(void*)tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm); 119204ca555eSLois Curfman McInnes for ( j=0; j<aij->N; j++ ) { 119304ca555eSLois Curfman McInnes if (tmp2[j] > *norm) *norm = tmp2[j]; 119404ca555eSLois Curfman McInnes } 11950452661fSBarry Smith PetscFree(tmp); PetscFree(tmp2); 119604ca555eSLois Curfman McInnes } 119704ca555eSLois Curfman McInnes else if (type == NORM_INFINITY) { /* max row norm */ 1198515d9167SLois Curfman McInnes double ntemp = 0.0; 119904ca555eSLois Curfman McInnes for ( j=0; j<amat->m; j++ ) { 1200dbb450caSBarry Smith v = amat->a + amat->i[j] + shift; 120104ca555eSLois Curfman McInnes sum = 0.0; 120204ca555eSLois Curfman McInnes for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1203cddf8d76SBarry Smith sum += PetscAbsScalar(*v); v++; 120404ca555eSLois Curfman McInnes } 1205dbb450caSBarry Smith v = bmat->a + bmat->i[j] + shift; 120604ca555eSLois Curfman McInnes for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1207cddf8d76SBarry Smith sum += PetscAbsScalar(*v); v++; 120804ca555eSLois Curfman McInnes } 1209515d9167SLois Curfman McInnes if (sum > ntemp) ntemp = sum; 121004ca555eSLois Curfman McInnes } 1211515d9167SLois Curfman McInnes MPI_Allreduce((void*)&ntemp,(void*)norm,1,MPI_DOUBLE,MPI_MAX,mat->comm); 121204ca555eSLois Curfman McInnes } 121304ca555eSLois Curfman McInnes else { 1214515d9167SLois Curfman McInnes SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm"); 121504ca555eSLois Curfman McInnes } 121637fa93a5SLois Curfman McInnes } 1217855ac2c5SLois Curfman McInnes return 0; 1218855ac2c5SLois Curfman McInnes } 1219855ac2c5SLois Curfman McInnes 12200de55854SLois Curfman McInnes static int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1221b7c46309SBarry Smith { 1222b7c46309SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1223dbb450caSBarry Smith Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1224416022c9SBarry Smith int ierr,shift = Aloc->indexshift; 1225416022c9SBarry Smith Mat B; 1226b7c46309SBarry Smith int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1227b7c46309SBarry Smith Scalar *array; 1228b7c46309SBarry Smith 1229416022c9SBarry Smith if (!matout && M != N) SETERRQ(1,"MatTranspose_MPIAIJ:Square only in-place"); 1230b4fd4287SBarry Smith ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0, 1231b4fd4287SBarry Smith PETSC_NULL,&B); CHKERRQ(ierr); 1232b7c46309SBarry Smith 1233b7c46309SBarry Smith /* copy over the A part */ 1234ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) a->A->data; 1235b7c46309SBarry Smith m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1236b7c46309SBarry Smith row = a->rstart; 1237dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1238b7c46309SBarry Smith for ( i=0; i<m; i++ ) { 1239416022c9SBarry Smith ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1240b7c46309SBarry Smith row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1241b7c46309SBarry Smith } 1242b7c46309SBarry Smith aj = Aloc->j; 1243dbb450caSBarry Smith for ( i=0; i<ai[m]|+shift; i++ ) {aj[i] -= a->cstart + shift;} 1244b7c46309SBarry Smith 1245b7c46309SBarry Smith /* copy over the B part */ 1246ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) a->B->data; 1247b7c46309SBarry Smith m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1248b7c46309SBarry Smith row = a->rstart; 12490452661fSBarry Smith ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1250dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1251b7c46309SBarry Smith for ( i=0; i<m; i++ ) { 1252416022c9SBarry Smith ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1253b7c46309SBarry Smith row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1254b7c46309SBarry Smith } 12550452661fSBarry Smith PetscFree(ct); 1256b7c46309SBarry Smith ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1257b7c46309SBarry Smith ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 12580de55854SLois Curfman McInnes if (matout) { 12590de55854SLois Curfman McInnes *matout = B; 12600de55854SLois Curfman McInnes } else { 12610de55854SLois Curfman McInnes /* This isn't really an in-place transpose .... but free data structures from a */ 12620452661fSBarry Smith PetscFree(a->rowners); 12630de55854SLois Curfman McInnes ierr = MatDestroy(a->A); CHKERRQ(ierr); 12640de55854SLois Curfman McInnes ierr = MatDestroy(a->B); CHKERRQ(ierr); 12650452661fSBarry Smith if (a->colmap) PetscFree(a->colmap); 12660452661fSBarry Smith if (a->garray) PetscFree(a->garray); 12670de55854SLois Curfman McInnes if (a->lvec) VecDestroy(a->lvec); 1268a56f8943SBarry Smith if (a->Mvctx) VecScatterDestroy(a->Mvctx); 12690452661fSBarry Smith PetscFree(a); 1270416022c9SBarry Smith PetscMemcpy(A,B,sizeof(struct _Mat)); 12710452661fSBarry Smith PetscHeaderDestroy(B); 12720de55854SLois Curfman McInnes } 1273b7c46309SBarry Smith return 0; 1274b7c46309SBarry Smith } 1275b7c46309SBarry Smith 1276682d7d0cSBarry Smith extern int MatPrintHelp_SeqAIJ(Mat); 1277682d7d0cSBarry Smith static int MatPrintHelp_MPIAIJ(Mat A) 1278682d7d0cSBarry Smith { 1279682d7d0cSBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1280682d7d0cSBarry Smith 1281682d7d0cSBarry Smith if (!a->rank) return MatPrintHelp_SeqAIJ(a->A); 1282682d7d0cSBarry Smith else return 0; 1283682d7d0cSBarry Smith } 1284682d7d0cSBarry Smith 1285fc76ce87SLois Curfman McInnes extern int MatConvert_MPIAIJ(Mat,MatType,Mat *); 12863d1612f7SBarry Smith static int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 12872f86bd48SSatish Balay extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 12888a729477SBarry Smith /* -------------------------------------------------------------------*/ 12892ee70a88SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 129039e00950SLois Curfman McInnes MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 129144a69424SLois Curfman McInnes MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 129244a69424SLois Curfman McInnes MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1293299609e3SLois Curfman McInnes MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ, 1294299609e3SLois Curfman McInnes MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ, 1295299609e3SLois Curfman McInnes MatLUFactor_MPIAIJ,0, 129644a69424SLois Curfman McInnes MatRelax_MPIAIJ, 1297b7c46309SBarry Smith MatTranspose_MPIAIJ, 1298a66be287SLois Curfman McInnes MatGetInfo_MPIAIJ,0, 1299855ac2c5SLois Curfman McInnes MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ, 1300ee50ffe9SBarry Smith MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 13011eb62cbbSBarry Smith 0, 1302299609e3SLois Curfman McInnes MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1303299609e3SLois Curfman McInnes MatGetReordering_MPIAIJ, 1304299609e3SLois Curfman McInnes MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0, 1305d1710a03SLois Curfman McInnes MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1306299609e3SLois Curfman McInnes MatILUFactorSymbolic_MPIAIJ,0, 13073d1612f7SBarry Smith 0,0,MatConvert_MPIAIJ,0,0,MatConvertSameType_MPIAIJ,0,0, 1308b49de8d1SLois Curfman McInnes 0,0,0, 13097fab95ffSSatish Balay 0,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0, 1310052efed2SBarry Smith MatPrintHelp_MPIAIJ, 1311052efed2SBarry Smith MatScale_MPIAIJ}; 13128a729477SBarry Smith 13131987afe7SBarry Smith /*@C 1314ff756334SLois Curfman McInnes MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1315ff756334SLois Curfman McInnes (the default parallel PETSc format). 13168a729477SBarry Smith 13178a729477SBarry Smith Input Parameters: 13181eb62cbbSBarry Smith . comm - MPI communicator 13197d3e4905SLois Curfman McInnes . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 13207d3e4905SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated 13217d3e4905SLois Curfman McInnes if N is given) 13227d3e4905SLois Curfman McInnes . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 13237d3e4905SLois Curfman McInnes . N - number of global columns (or PETSC_DECIDE to have calculated 13247d3e4905SLois Curfman McInnes if n is given) 1325ab693e5aSLois Curfman McInnes . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1326ff756334SLois Curfman McInnes (same for all local rows) 1327ab693e5aSLois Curfman McInnes . d_nzz - number of nonzeros per row in diagonal portion of local submatrix 1328ab693e5aSLois Curfman McInnes or null (possibly different for each row). You must leave room 1329ab693e5aSLois Curfman McInnes for the diagonal entry even if it is zero. 1330ab693e5aSLois Curfman McInnes . o_nz - number of nonzeros per row in off-diagonal portion of local 1331ab693e5aSLois Curfman McInnes submatrix (same for all local rows). 1332ab693e5aSLois Curfman McInnes . o_nzz - number of nonzeros per row in off-diagonal portion of local 1333ab693e5aSLois Curfman McInnes submatrix or null (possibly different for each row). 13348a729477SBarry Smith 1335ff756334SLois Curfman McInnes Output Parameter: 13368a729477SBarry Smith . newmat - the matrix 13378a729477SBarry Smith 1338ff756334SLois Curfman McInnes Notes: 1339ff756334SLois Curfman McInnes The AIJ format (also called the Yale sparse matrix format or 1340ff756334SLois Curfman McInnes compressed row storage), is fully compatible with standard Fortran 77 13410002213bSLois Curfman McInnes storage. That is, the stored row and column indices can begin at 13420002213bSLois Curfman McInnes either one (as in Fortran) or zero. See the users manual for details. 1343ff756334SLois Curfman McInnes 1344ff756334SLois Curfman McInnes The user MUST specify either the local or global matrix dimensions 1345ff756334SLois Curfman McInnes (possibly both). 1346ff756334SLois Curfman McInnes 1347e0245417SLois Curfman McInnes Storage Information: 1348e0245417SLois Curfman McInnes For a square global matrix we define each processor's diagonal portion 1349e0245417SLois Curfman McInnes to be its local rows and the corresponding columns (a square submatrix); 1350e0245417SLois Curfman McInnes each processor's off-diagonal portion encompasses the remainder of the 1351e0245417SLois Curfman McInnes local matrix (a rectangular submatrix). 1352e0245417SLois Curfman McInnes 1353e0245417SLois Curfman McInnes The user can specify preallocated storage for the diagonal part of 13545ace5be8SLois Curfman McInnes the local submatrix with either d_nz or d_nnz (not both). Set 13555ace5be8SLois Curfman McInnes d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 13565ace5be8SLois Curfman McInnes memory allocation. Likewise, specify preallocated storage for the 13575ace5be8SLois Curfman McInnes off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1358ff756334SLois Curfman McInnes 1359*2191d07cSBarry Smith $ Consider a processor that owns rows 3, 4 and 5 1360*2191d07cSBarry Smith $ 1361*2191d07cSBarry Smith $ row col = 0 1 2 3 4 5 6 7 8 9 10 11 1362*2191d07cSBarry Smith $ 3 o o o d d d o o o o o o 1363*2191d07cSBarry Smith $ 4 o o o d d d o o o o o o 1364*2191d07cSBarry Smith $ 5 o o o d d d o o o o o o 1365*2191d07cSBarry Smith $ 1366*2191d07cSBarry Smith $ Thus any entries in the d locations are stored in the d matrix and 1367*2191d07cSBarry Smith $ any in the o locations are store in the o matrix. (The d and the o matrix 1368*2191d07cSBarry Smith $ are simply SeqAIJ matrix (that is they store the matrices in compress 1369*2191d07cSBarry Smith $ row storage)). 1370*2191d07cSBarry Smith 1371*2191d07cSBarry Smith $ Now d_nz should give the number of non zeros per row in the d matrix 1372*2191d07cSBarry Smith $ and o_nz should give the number of nonzeros per row in the o matrix. 1373*2191d07cSBarry Smith $ In general for PDE problems where most nonzeros are near the diagonal 1374*2191d07cSBarry Smith $ one expects d_nz >> o_nz 1375*2191d07cSBarry Smith 1376c451ab25SLois Curfman McInnes By default, this format uses inodes (identical nodes) when possible. 1377c451ab25SLois Curfman McInnes We search for consecutive rows with the same nonzero structure, thereby 1378c451ab25SLois Curfman McInnes reusing matrix information to achieve increased efficiency. 1379c451ab25SLois Curfman McInnes 1380c451ab25SLois Curfman McInnes Options Database Keys: 1381c451ab25SLois Curfman McInnes $ -mat_aij_no_inode - Do not use inodes 1382c451ab25SLois Curfman McInnes $ -mat_aij_inode_limit <limit> - Set inode limit (max limit=5) 1383c451ab25SLois Curfman McInnes 1384dbd7a890SLois Curfman McInnes .keywords: matrix, aij, compressed row, sparse, parallel 1385ff756334SLois Curfman McInnes 1386fafbff53SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 13878a729477SBarry Smith @*/ 13881eb62cbbSBarry Smith int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 13891eb62cbbSBarry Smith int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *newmat) 13908a729477SBarry Smith { 13918a729477SBarry Smith Mat mat; 1392416022c9SBarry Smith Mat_MPIAIJ *a; 13936abc6512SBarry Smith int ierr, i,sum[2],work[2]; 1394416022c9SBarry Smith 13958a729477SBarry Smith *newmat = 0; 13960452661fSBarry Smith PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 1397a5a9c739SBarry Smith PLogObjectCreate(mat); 13980452661fSBarry Smith mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1399416022c9SBarry Smith PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 140044a69424SLois Curfman McInnes mat->destroy = MatDestroy_MPIAIJ; 140144a69424SLois Curfman McInnes mat->view = MatView_MPIAIJ; 14028a729477SBarry Smith mat->factor = 0; 1403c456f294SBarry Smith mat->assembled = PETSC_FALSE; 1404d6dfbf8fSBarry Smith 140564119d58SLois Curfman McInnes a->insertmode = NOT_SET_VALUES; 140617699dbbSLois Curfman McInnes MPI_Comm_rank(comm,&a->rank); 140717699dbbSLois Curfman McInnes MPI_Comm_size(comm,&a->size); 14081eb62cbbSBarry Smith 1409b4fd4287SBarry Smith if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 14101987afe7SBarry Smith SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSc decide rows but set d_nnz or o_nnz"); 14111987afe7SBarry Smith 1412dbd7a890SLois Curfman McInnes if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 14131eb62cbbSBarry Smith work[0] = m; work[1] = n; 1414d65a2f8fSBarry Smith MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1415dbd7a890SLois Curfman McInnes if (M == PETSC_DECIDE) M = sum[0]; 1416dbd7a890SLois Curfman McInnes if (N == PETSC_DECIDE) N = sum[1]; 14171eb62cbbSBarry Smith } 141817699dbbSLois Curfman McInnes if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);} 141917699dbbSLois Curfman McInnes if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);} 1420416022c9SBarry Smith a->m = m; 1421416022c9SBarry Smith a->n = n; 1422416022c9SBarry Smith a->N = N; 1423416022c9SBarry Smith a->M = M; 14241eb62cbbSBarry Smith 14251eb62cbbSBarry Smith /* build local table of row and column ownerships */ 14260452661fSBarry Smith a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1427579c6b6fSBarry Smith PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 142817699dbbSLois Curfman McInnes a->cowners = a->rowners + a->size + 1; 1429416022c9SBarry Smith MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm); 1430416022c9SBarry Smith a->rowners[0] = 0; 143117699dbbSLois Curfman McInnes for ( i=2; i<=a->size; i++ ) { 1432416022c9SBarry Smith a->rowners[i] += a->rowners[i-1]; 14338a729477SBarry Smith } 143417699dbbSLois Curfman McInnes a->rstart = a->rowners[a->rank]; 143517699dbbSLois Curfman McInnes a->rend = a->rowners[a->rank+1]; 1436416022c9SBarry Smith MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm); 1437416022c9SBarry Smith a->cowners[0] = 0; 143817699dbbSLois Curfman McInnes for ( i=2; i<=a->size; i++ ) { 1439416022c9SBarry Smith a->cowners[i] += a->cowners[i-1]; 14408a729477SBarry Smith } 144117699dbbSLois Curfman McInnes a->cstart = a->cowners[a->rank]; 144217699dbbSLois Curfman McInnes a->cend = a->cowners[a->rank+1]; 14438a729477SBarry Smith 14445ace5be8SLois Curfman McInnes if (d_nz == PETSC_DEFAULT) d_nz = 5; 1445416022c9SBarry Smith ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&a->A); CHKERRQ(ierr); 1446416022c9SBarry Smith PLogObjectParent(mat,a->A); 14477b8455f0SLois Curfman McInnes if (o_nz == PETSC_DEFAULT) o_nz = 0; 1448416022c9SBarry Smith ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&a->B); CHKERRQ(ierr); 1449416022c9SBarry Smith PLogObjectParent(mat,a->B); 14508a729477SBarry Smith 14511eb62cbbSBarry Smith /* build cache for off array entries formed */ 1452416022c9SBarry Smith ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr); 1453416022c9SBarry Smith a->colmap = 0; 1454416022c9SBarry Smith a->garray = 0; 14554b0e389bSBarry Smith a->roworiented = 1; 14568a729477SBarry Smith 14571eb62cbbSBarry Smith /* stuff used for matrix vector multiply */ 1458416022c9SBarry Smith a->lvec = 0; 1459416022c9SBarry Smith a->Mvctx = 0; 14608a729477SBarry Smith 1461d6dfbf8fSBarry Smith *newmat = mat; 1462d6dfbf8fSBarry Smith return 0; 1463d6dfbf8fSBarry Smith } 1464c74985f6SBarry Smith 14653d1612f7SBarry Smith static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1466d6dfbf8fSBarry Smith { 1467d6dfbf8fSBarry Smith Mat mat; 1468416022c9SBarry Smith Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 146925cdf11fSBarry Smith int ierr, len,flg; 1470d6dfbf8fSBarry Smith 1471416022c9SBarry Smith *newmat = 0; 14720452661fSBarry Smith PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1473a5a9c739SBarry Smith PLogObjectCreate(mat); 14740452661fSBarry Smith mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1475416022c9SBarry Smith PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 147644a69424SLois Curfman McInnes mat->destroy = MatDestroy_MPIAIJ; 147744a69424SLois Curfman McInnes mat->view = MatView_MPIAIJ; 1478d6dfbf8fSBarry Smith mat->factor = matin->factor; 1479c456f294SBarry Smith mat->assembled = PETSC_TRUE; 1480d6dfbf8fSBarry Smith 1481416022c9SBarry Smith a->m = oldmat->m; 1482416022c9SBarry Smith a->n = oldmat->n; 1483416022c9SBarry Smith a->M = oldmat->M; 1484416022c9SBarry Smith a->N = oldmat->N; 1485d6dfbf8fSBarry Smith 1486416022c9SBarry Smith a->rstart = oldmat->rstart; 1487416022c9SBarry Smith a->rend = oldmat->rend; 1488416022c9SBarry Smith a->cstart = oldmat->cstart; 1489416022c9SBarry Smith a->cend = oldmat->cend; 149017699dbbSLois Curfman McInnes a->size = oldmat->size; 149117699dbbSLois Curfman McInnes a->rank = oldmat->rank; 149264119d58SLois Curfman McInnes a->insertmode = NOT_SET_VALUES; 1493d6dfbf8fSBarry Smith 14940452661fSBarry Smith a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners); 149517699dbbSLois Curfman McInnes PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 149617699dbbSLois Curfman McInnes PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int)); 1497416022c9SBarry Smith ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 14982ee70a88SLois Curfman McInnes if (oldmat->colmap) { 14990452661fSBarry Smith a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1500416022c9SBarry Smith PLogObjectMemory(mat,(a->N)*sizeof(int)); 1501416022c9SBarry Smith PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1502416022c9SBarry Smith } else a->colmap = 0; 1503ec8511deSBarry Smith if (oldmat->garray && (len = ((Mat_SeqAIJ *) (oldmat->B->data))->n)) { 15040452661fSBarry Smith a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 1505464493b3SBarry Smith PLogObjectMemory(mat,len*sizeof(int)); 1506416022c9SBarry Smith PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1507416022c9SBarry Smith } else a->garray = 0; 1508d6dfbf8fSBarry Smith 1509416022c9SBarry Smith ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1510416022c9SBarry Smith PLogObjectParent(mat,a->lvec); 1511a56f8943SBarry Smith ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1512416022c9SBarry Smith PLogObjectParent(mat,a->Mvctx); 1513416022c9SBarry Smith ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1514416022c9SBarry Smith PLogObjectParent(mat,a->A); 1515416022c9SBarry Smith ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1516416022c9SBarry Smith PLogObjectParent(mat,a->B); 15175dd7a6c7SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 151825cdf11fSBarry Smith if (flg) { 1519682d7d0cSBarry Smith ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1520682d7d0cSBarry Smith } 15218a729477SBarry Smith *newmat = mat; 15228a729477SBarry Smith return 0; 15238a729477SBarry Smith } 1524416022c9SBarry Smith 1525416022c9SBarry Smith #include "sysio.h" 1526416022c9SBarry Smith 1527c456f294SBarry Smith int MatLoad_MPIAIJ(Viewer bview,MatType type,Mat *newmat) 1528416022c9SBarry Smith { 1529d65a2f8fSBarry Smith Mat A; 1530416022c9SBarry Smith int i, nz, ierr, j,rstart, rend, fd; 1531d65a2f8fSBarry Smith Scalar *vals,*svals; 1532416022c9SBarry Smith PetscObject vobj = (PetscObject) bview; 1533416022c9SBarry Smith MPI_Comm comm = vobj->comm; 1534416022c9SBarry Smith MPI_Status status; 153517699dbbSLois Curfman McInnes int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1536d65a2f8fSBarry Smith int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1537d65a2f8fSBarry Smith int tag = ((PetscObject)bview)->tag; 1538416022c9SBarry Smith 153917699dbbSLois Curfman McInnes MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 154017699dbbSLois Curfman McInnes if (!rank) { 1541416022c9SBarry Smith ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr); 1542416022c9SBarry Smith ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr); 1543c456f294SBarry Smith if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJ:not matrix object"); 1544416022c9SBarry Smith } 1545416022c9SBarry Smith 1546416022c9SBarry Smith MPI_Bcast(header+1,3,MPI_INT,0,comm); 1547416022c9SBarry Smith M = header[1]; N = header[2]; 1548416022c9SBarry Smith /* determine ownership of all rows */ 154917699dbbSLois Curfman McInnes m = M/size + ((M % size) > rank); 15500452661fSBarry Smith rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1551416022c9SBarry Smith MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1552416022c9SBarry Smith rowners[0] = 0; 155317699dbbSLois Curfman McInnes for ( i=2; i<=size; i++ ) { 1554416022c9SBarry Smith rowners[i] += rowners[i-1]; 1555416022c9SBarry Smith } 155617699dbbSLois Curfman McInnes rstart = rowners[rank]; 155717699dbbSLois Curfman McInnes rend = rowners[rank+1]; 1558416022c9SBarry Smith 1559416022c9SBarry Smith /* distribute row lengths to all processors */ 15600452661fSBarry Smith ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1561416022c9SBarry Smith offlens = ourlens + (rend-rstart); 156217699dbbSLois Curfman McInnes if (!rank) { 15630452661fSBarry Smith rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1564416022c9SBarry Smith ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr); 15650452661fSBarry Smith sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 156617699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1567d65a2f8fSBarry Smith MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 15680452661fSBarry Smith PetscFree(sndcounts); 1569416022c9SBarry Smith } 1570416022c9SBarry Smith else { 1571416022c9SBarry Smith MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1572416022c9SBarry Smith } 1573416022c9SBarry Smith 157417699dbbSLois Curfman McInnes if (!rank) { 1575416022c9SBarry Smith /* calculate the number of nonzeros on each processor */ 15760452661fSBarry Smith procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1577cddf8d76SBarry Smith PetscMemzero(procsnz,size*sizeof(int)); 157817699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 1579416022c9SBarry Smith for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1580416022c9SBarry Smith procsnz[i] += rowlengths[j]; 1581416022c9SBarry Smith } 1582416022c9SBarry Smith } 15830452661fSBarry Smith PetscFree(rowlengths); 1584416022c9SBarry Smith 1585416022c9SBarry Smith /* determine max buffer needed and allocate it */ 1586416022c9SBarry Smith maxnz = 0; 158717699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 15880452661fSBarry Smith maxnz = PetscMax(maxnz,procsnz[i]); 1589416022c9SBarry Smith } 15900452661fSBarry Smith cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1591416022c9SBarry Smith 1592416022c9SBarry Smith /* read in my part of the matrix column indices */ 1593416022c9SBarry Smith nz = procsnz[0]; 15940452661fSBarry Smith mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1595d65a2f8fSBarry Smith ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr); 1596d65a2f8fSBarry Smith 1597d65a2f8fSBarry Smith /* read in every one elses and ship off */ 159817699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { 1599d65a2f8fSBarry Smith nz = procsnz[i]; 1600416022c9SBarry Smith ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr); 1601d65a2f8fSBarry Smith MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1602d65a2f8fSBarry Smith } 16030452661fSBarry Smith PetscFree(cols); 1604416022c9SBarry Smith } 1605416022c9SBarry Smith else { 1606416022c9SBarry Smith /* determine buffer space needed for message */ 1607416022c9SBarry Smith nz = 0; 1608416022c9SBarry Smith for ( i=0; i<m; i++ ) { 1609416022c9SBarry Smith nz += ourlens[i]; 1610416022c9SBarry Smith } 16110452661fSBarry Smith mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1612416022c9SBarry Smith 1613416022c9SBarry Smith /* receive message of column indices*/ 1614d65a2f8fSBarry Smith MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1615416022c9SBarry Smith MPI_Get_count(&status,MPI_INT,&maxnz); 1616c456f294SBarry Smith if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file"); 1617416022c9SBarry Smith } 1618416022c9SBarry Smith 1619416022c9SBarry Smith /* loop over local rows, determining number of off diagonal entries */ 1620cddf8d76SBarry Smith PetscMemzero(offlens,m*sizeof(int)); 1621416022c9SBarry Smith jj = 0; 1622416022c9SBarry Smith for ( i=0; i<m; i++ ) { 1623416022c9SBarry Smith for ( j=0; j<ourlens[i]; j++ ) { 1624d65a2f8fSBarry Smith if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1625416022c9SBarry Smith jj++; 1626416022c9SBarry Smith } 1627416022c9SBarry Smith } 1628d65a2f8fSBarry Smith 1629d65a2f8fSBarry Smith /* create our matrix */ 1630416022c9SBarry Smith for ( i=0; i<m; i++ ) { 1631416022c9SBarry Smith ourlens[i] -= offlens[i]; 1632416022c9SBarry Smith } 1633d65a2f8fSBarry Smith ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1634d65a2f8fSBarry Smith A = *newmat; 1635d65a2f8fSBarry Smith MatSetOption(A,COLUMNS_SORTED); 1636d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 1637d65a2f8fSBarry Smith ourlens[i] += offlens[i]; 1638d65a2f8fSBarry Smith } 1639416022c9SBarry Smith 164017699dbbSLois Curfman McInnes if (!rank) { 16410452661fSBarry Smith vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1642416022c9SBarry Smith 1643416022c9SBarry Smith /* read in my part of the matrix numerical values */ 1644416022c9SBarry Smith nz = procsnz[0]; 1645416022c9SBarry Smith ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 1646d65a2f8fSBarry Smith 1647d65a2f8fSBarry Smith /* insert into matrix */ 1648d65a2f8fSBarry Smith jj = rstart; 1649d65a2f8fSBarry Smith smycols = mycols; 1650d65a2f8fSBarry Smith svals = vals; 1651d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 1652d65a2f8fSBarry Smith ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1653d65a2f8fSBarry Smith smycols += ourlens[i]; 1654d65a2f8fSBarry Smith svals += ourlens[i]; 1655d65a2f8fSBarry Smith jj++; 1656416022c9SBarry Smith } 1657416022c9SBarry Smith 1658d65a2f8fSBarry Smith /* read in other processors and ship out */ 165917699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { 1660416022c9SBarry Smith nz = procsnz[i]; 1661416022c9SBarry Smith ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 1662d65a2f8fSBarry Smith MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1663416022c9SBarry Smith } 16640452661fSBarry Smith PetscFree(procsnz); 1665416022c9SBarry Smith } 1666d65a2f8fSBarry Smith else { 1667d65a2f8fSBarry Smith /* receive numeric values */ 16680452661fSBarry Smith vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1669416022c9SBarry Smith 1670d65a2f8fSBarry Smith /* receive message of values*/ 1671d65a2f8fSBarry Smith MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1672d65a2f8fSBarry Smith MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1673c456f294SBarry Smith if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file"); 1674d65a2f8fSBarry Smith 1675d65a2f8fSBarry Smith /* insert into matrix */ 1676d65a2f8fSBarry Smith jj = rstart; 1677d65a2f8fSBarry Smith smycols = mycols; 1678d65a2f8fSBarry Smith svals = vals; 1679d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 1680d65a2f8fSBarry Smith ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1681d65a2f8fSBarry Smith smycols += ourlens[i]; 1682d65a2f8fSBarry Smith svals += ourlens[i]; 1683d65a2f8fSBarry Smith jj++; 1684d65a2f8fSBarry Smith } 1685d65a2f8fSBarry Smith } 16860452661fSBarry Smith PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1687d65a2f8fSBarry Smith 1688d65a2f8fSBarry Smith ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1689d65a2f8fSBarry Smith ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1690416022c9SBarry Smith return 0; 1691416022c9SBarry Smith } 1692