1cb512458SBarry Smith #ifndef lint 2*3a511b96SLois Curfman McInnes static char vcid[] = "$Id: mpiaij.c,v 1.123 1996/02/07 23:13:36 balay Exp curfman $"; 3cb512458SBarry Smith #endif 48a729477SBarry Smith 51eb62cbbSBarry Smith #include "mpiaij.h" 68a729477SBarry Smith #include "vec/vecimpl.h" 7d6dfbf8fSBarry Smith #include "inline/spops.h" 88a729477SBarry Smith 99e25ed09SBarry Smith /* local utility routine that creates a mapping from the global column 109e25ed09SBarry Smith number to the local number in the off-diagonal part of the local 119e25ed09SBarry Smith storage of the matrix. This is done in a non scable way since the 129e25ed09SBarry Smith length of colmap equals the global matrix length. 139e25ed09SBarry Smith */ 14b7c46309SBarry Smith static int CreateColmap_Private(Mat mat) 159e25ed09SBarry Smith { 1644a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 17ec8511deSBarry Smith Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 18416022c9SBarry Smith int n = B->n,i,shift = B->indexshift; 19dbb450caSBarry Smith 200452661fSBarry Smith aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap); 21464493b3SBarry Smith PLogObjectMemory(mat,aij->N*sizeof(int)); 22cddf8d76SBarry Smith PetscMemzero(aij->colmap,aij->N*sizeof(int)); 23dbb450caSBarry Smith for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i-shift; 249e25ed09SBarry Smith return 0; 259e25ed09SBarry Smith } 269e25ed09SBarry Smith 272493cbb0SBarry Smith extern int DisAssemble_MPIAIJ(Mat); 282493cbb0SBarry Smith 2951c98ccdSLois Curfman McInnes static int MatGetReordering_MPIAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm) 30299609e3SLois Curfman McInnes { 31299609e3SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 32299609e3SLois Curfman McInnes int ierr; 3317699dbbSLois Curfman McInnes if (aij->size == 1) { 3451c98ccdSLois Curfman McInnes ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr); 3548d91487SBarry Smith } else SETERRQ(1,"MatGetReordering_MPIAIJ:not supported in parallel"); 36299609e3SLois Curfman McInnes return 0; 37299609e3SLois Curfman McInnes } 38299609e3SLois Curfman McInnes 394b0e389bSBarry Smith static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 408a729477SBarry Smith { 4144a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 42dbb450caSBarry Smith Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data; 434b0e389bSBarry Smith Scalar value; 441eb62cbbSBarry Smith int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 451eb62cbbSBarry Smith int cstart = aij->cstart, cend = aij->cend,row,col; 464b0e389bSBarry Smith int shift = C->indexshift,roworiented = aij->roworiented; 478a729477SBarry Smith 4864119d58SLois Curfman McInnes if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) { 4948d91487SBarry Smith SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds"); 508a729477SBarry Smith } 511eb62cbbSBarry Smith aij->insertmode = addv; 528a729477SBarry Smith for ( i=0; i<m; i++ ) { 534b0e389bSBarry Smith if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row"); 544b0e389bSBarry Smith if (im[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large"); 554b0e389bSBarry Smith if (im[i] >= rstart && im[i] < rend) { 564b0e389bSBarry Smith row = im[i] - rstart; 571eb62cbbSBarry Smith for ( j=0; j<n; j++ ) { 584b0e389bSBarry Smith if (in[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column"); 594b0e389bSBarry Smith if (in[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large"); 604b0e389bSBarry Smith if (in[j] >= cstart && in[j] < cend){ 614b0e389bSBarry Smith col = in[j] - cstart; 624b0e389bSBarry Smith if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 634b0e389bSBarry Smith ierr = MatSetValues(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 641eb62cbbSBarry Smith } 651eb62cbbSBarry Smith else { 66227d817aSBarry Smith if (mat->was_assembled) { 67b7c46309SBarry Smith if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 684b0e389bSBarry Smith col = aij->colmap[in[j]] + shift; 69ec8511deSBarry Smith if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 702493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 714b0e389bSBarry Smith col = in[j]; 72d6dfbf8fSBarry Smith } 739e25ed09SBarry Smith } 744b0e389bSBarry Smith else col = in[j]; 754b0e389bSBarry Smith if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 764b0e389bSBarry Smith ierr = MatSetValues(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 771eb62cbbSBarry Smith } 781eb62cbbSBarry Smith } 791eb62cbbSBarry Smith } 801eb62cbbSBarry Smith else { 814b0e389bSBarry Smith if (roworiented) { 824b0e389bSBarry Smith ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 834b0e389bSBarry Smith } 844b0e389bSBarry Smith else { 854b0e389bSBarry Smith row = im[i]; 864b0e389bSBarry Smith for ( j=0; j<n; j++ ) { 874b0e389bSBarry Smith ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 884b0e389bSBarry Smith } 894b0e389bSBarry Smith } 901eb62cbbSBarry Smith } 918a729477SBarry Smith } 928a729477SBarry Smith return 0; 938a729477SBarry Smith } 948a729477SBarry Smith 95b49de8d1SLois Curfman McInnes static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 96b49de8d1SLois Curfman McInnes { 97b49de8d1SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 98b49de8d1SLois Curfman McInnes Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data; 99b49de8d1SLois Curfman McInnes int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 100b49de8d1SLois Curfman McInnes int cstart = aij->cstart, cend = aij->cend,row,col; 101b49de8d1SLois Curfman McInnes int shift = C->indexshift; 102b49de8d1SLois Curfman McInnes 103b49de8d1SLois Curfman McInnes for ( i=0; i<m; i++ ) { 104b49de8d1SLois Curfman McInnes if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative row"); 105b49de8d1SLois Curfman McInnes if (idxm[i] >= aij->M) SETERRQ(1,"MatGetValues_MPIAIJ:Row too large"); 106b49de8d1SLois Curfman McInnes if (idxm[i] >= rstart && idxm[i] < rend) { 107b49de8d1SLois Curfman McInnes row = idxm[i] - rstart; 108b49de8d1SLois Curfman McInnes for ( j=0; j<n; j++ ) { 109b49de8d1SLois Curfman McInnes if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative column"); 110b49de8d1SLois Curfman McInnes if (idxn[j] >= aij->N) SETERRQ(1,"MatGetValues_MPIAIJ:Col too large"); 111b49de8d1SLois Curfman McInnes if (idxn[j] >= cstart && idxn[j] < cend){ 112b49de8d1SLois Curfman McInnes col = idxn[j] - cstart; 113b49de8d1SLois Curfman McInnes ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 114b49de8d1SLois Curfman McInnes } 115b49de8d1SLois Curfman McInnes else { 116b49de8d1SLois Curfman McInnes col = aij->colmap[idxn[j]] + shift; 117b49de8d1SLois Curfman McInnes ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 118b49de8d1SLois Curfman McInnes } 119b49de8d1SLois Curfman McInnes } 120b49de8d1SLois Curfman McInnes } 121b49de8d1SLois Curfman McInnes else { 122b49de8d1SLois Curfman McInnes SETERRQ(1,"MatGetValues_MPIAIJ:Only local values currently supported"); 123b49de8d1SLois Curfman McInnes } 124b49de8d1SLois Curfman McInnes } 125b49de8d1SLois Curfman McInnes return 0; 126b49de8d1SLois Curfman McInnes } 127b49de8d1SLois Curfman McInnes 128fc76ce87SLois Curfman McInnes static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 1298a729477SBarry Smith { 13044a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 131d6dfbf8fSBarry Smith MPI_Comm comm = mat->comm; 13217699dbbSLois Curfman McInnes int size = aij->size, *owners = aij->rowners; 13317699dbbSLois Curfman McInnes int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 1341eb62cbbSBarry Smith MPI_Request *send_waits,*recv_waits; 1356abc6512SBarry Smith int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 1361eb62cbbSBarry Smith InsertMode addv; 1371eb62cbbSBarry Smith Scalar *rvalues,*svalues; 1381eb62cbbSBarry Smith 1391eb62cbbSBarry Smith /* make sure all processors are either in INSERTMODE or ADDMODE */ 140d65a2f8fSBarry Smith MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 141dbb450caSBarry Smith if (addv == (ADD_VALUES|INSERT_VALUES)) { 142bbb6d6a8SBarry Smith SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added"); 1431eb62cbbSBarry Smith } 1441eb62cbbSBarry Smith aij->insertmode = addv; /* in case this processor had no cache */ 1451eb62cbbSBarry Smith 1461eb62cbbSBarry Smith /* first count number of contributors to each processor */ 1470452661fSBarry Smith nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 148cddf8d76SBarry Smith PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 1490452661fSBarry Smith owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 1501eb62cbbSBarry Smith for ( i=0; i<aij->stash.n; i++ ) { 1511eb62cbbSBarry Smith idx = aij->stash.idx[i]; 15217699dbbSLois Curfman McInnes for ( j=0; j<size; j++ ) { 1531eb62cbbSBarry Smith if (idx >= owners[j] && idx < owners[j+1]) { 1541eb62cbbSBarry Smith nprocs[j]++; procs[j] = 1; owner[i] = j; break; 1558a729477SBarry Smith } 1568a729477SBarry Smith } 1578a729477SBarry Smith } 15817699dbbSLois Curfman McInnes nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 1591eb62cbbSBarry Smith 1601eb62cbbSBarry Smith /* inform other processors of number of messages and max length*/ 1610452661fSBarry Smith work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 16217699dbbSLois Curfman McInnes MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 16317699dbbSLois Curfman McInnes nreceives = work[rank]; 16417699dbbSLois Curfman McInnes MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 16517699dbbSLois Curfman McInnes nmax = work[rank]; 1660452661fSBarry Smith PetscFree(work); 1671eb62cbbSBarry Smith 1681eb62cbbSBarry Smith /* post receives: 1691eb62cbbSBarry Smith 1) each message will consist of ordered pairs 1701eb62cbbSBarry Smith (global index,value) we store the global index as a double 171d6dfbf8fSBarry Smith to simplify the message passing. 1721eb62cbbSBarry Smith 2) since we don't know how long each individual message is we 1731eb62cbbSBarry Smith allocate the largest needed buffer for each receive. Potentially 1741eb62cbbSBarry Smith this is a lot of wasted space. 1751eb62cbbSBarry Smith 1761eb62cbbSBarry Smith 1771eb62cbbSBarry Smith This could be done better. 1781eb62cbbSBarry Smith */ 1790452661fSBarry Smith rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 18078b31e54SBarry Smith CHKPTRQ(rvalues); 1810452661fSBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 18278b31e54SBarry Smith CHKPTRQ(recv_waits); 1831eb62cbbSBarry Smith for ( i=0; i<nreceives; i++ ) { 184416022c9SBarry Smith MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 1851eb62cbbSBarry Smith comm,recv_waits+i); 1861eb62cbbSBarry Smith } 1871eb62cbbSBarry Smith 1881eb62cbbSBarry Smith /* do sends: 1891eb62cbbSBarry Smith 1) starts[i] gives the starting index in svalues for stuff going to 1901eb62cbbSBarry Smith the ith processor 1911eb62cbbSBarry Smith */ 1920452661fSBarry Smith svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 1930452661fSBarry Smith send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 19478b31e54SBarry Smith CHKPTRQ(send_waits); 1950452661fSBarry Smith starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 1961eb62cbbSBarry Smith starts[0] = 0; 19717699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1981eb62cbbSBarry Smith for ( i=0; i<aij->stash.n; i++ ) { 1991eb62cbbSBarry Smith svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 2001eb62cbbSBarry Smith svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 2011eb62cbbSBarry Smith svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 2021eb62cbbSBarry Smith } 2030452661fSBarry Smith PetscFree(owner); 2041eb62cbbSBarry Smith starts[0] = 0; 20517699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 2061eb62cbbSBarry Smith count = 0; 20717699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 2081eb62cbbSBarry Smith if (procs[i]) { 209416022c9SBarry Smith MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 2101eb62cbbSBarry Smith comm,send_waits+count++); 2111eb62cbbSBarry Smith } 2121eb62cbbSBarry Smith } 2130452661fSBarry Smith PetscFree(starts); PetscFree(nprocs); 2141eb62cbbSBarry Smith 2151eb62cbbSBarry Smith /* Free cache space */ 2162191d07cSBarry Smith PLogInfo(0,"[%d]MatAssemblyBegin_MPIAIJ:Number of off processor values %d\n",rank,aij->stash.n); 21778b31e54SBarry Smith ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 2181eb62cbbSBarry Smith 2191eb62cbbSBarry Smith aij->svalues = svalues; aij->rvalues = rvalues; 2201eb62cbbSBarry Smith aij->nsends = nsends; aij->nrecvs = nreceives; 2211eb62cbbSBarry Smith aij->send_waits = send_waits; aij->recv_waits = recv_waits; 2221eb62cbbSBarry Smith aij->rmax = nmax; 2231eb62cbbSBarry Smith 2241eb62cbbSBarry Smith return 0; 2251eb62cbbSBarry Smith } 22644a69424SLois Curfman McInnes extern int MatSetUpMultiply_MPIAIJ(Mat); 2271eb62cbbSBarry Smith 228fc76ce87SLois Curfman McInnes static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 2291eb62cbbSBarry Smith { 23044a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 231dbb450caSBarry Smith Mat_SeqAIJ *C = (Mat_SeqAIJ *) aij->A->data; 2321eb62cbbSBarry Smith MPI_Status *send_status,recv_status; 233416022c9SBarry Smith int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 234416022c9SBarry Smith int row,col,other_disassembled,shift = C->indexshift; 2351eb62cbbSBarry Smith Scalar *values,val; 2361eb62cbbSBarry Smith InsertMode addv = aij->insertmode; 2371eb62cbbSBarry Smith 2381eb62cbbSBarry Smith /* wait on receives */ 2391eb62cbbSBarry Smith while (count) { 240d6dfbf8fSBarry Smith MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 2411eb62cbbSBarry Smith /* unpack receives into our local space */ 242d6dfbf8fSBarry Smith values = aij->rvalues + 3*imdex*aij->rmax; 243c60448a5SBarry Smith MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 2441eb62cbbSBarry Smith n = n/3; 2451eb62cbbSBarry Smith for ( i=0; i<n; i++ ) { 246227d817aSBarry Smith row = (int) PetscReal(values[3*i]) - aij->rstart; 247227d817aSBarry Smith col = (int) PetscReal(values[3*i+1]); 2481eb62cbbSBarry Smith val = values[3*i+2]; 2491eb62cbbSBarry Smith if (col >= aij->cstart && col < aij->cend) { 2501eb62cbbSBarry Smith col -= aij->cstart; 2511eb62cbbSBarry Smith MatSetValues(aij->A,1,&row,1,&col,&val,addv); 2521eb62cbbSBarry Smith } 2531eb62cbbSBarry Smith else { 254227d817aSBarry Smith if (mat->was_assembled) { 255b7c46309SBarry Smith if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 256dbb450caSBarry Smith col = aij->colmap[col] + shift; 257ec8511deSBarry Smith if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 2582493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 259227d817aSBarry Smith col = (int) PetscReal(values[3*i+1]); 260d6dfbf8fSBarry Smith } 2619e25ed09SBarry Smith } 2621eb62cbbSBarry Smith MatSetValues(aij->B,1,&row,1,&col,&val,addv); 2631eb62cbbSBarry Smith } 2641eb62cbbSBarry Smith } 2651eb62cbbSBarry Smith count--; 2661eb62cbbSBarry Smith } 2670452661fSBarry Smith PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 2681eb62cbbSBarry Smith 2691eb62cbbSBarry Smith /* wait on sends */ 2701eb62cbbSBarry Smith if (aij->nsends) { 2710452661fSBarry Smith send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status)); 27278b31e54SBarry Smith CHKPTRQ(send_status); 2731eb62cbbSBarry Smith MPI_Waitall(aij->nsends,aij->send_waits,send_status); 2740452661fSBarry Smith PetscFree(send_status); 2751eb62cbbSBarry Smith } 2760452661fSBarry Smith PetscFree(aij->send_waits); PetscFree(aij->svalues); 2771eb62cbbSBarry Smith 27864119d58SLois Curfman McInnes aij->insertmode = NOT_SET_VALUES; 27978b31e54SBarry Smith ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 28078b31e54SBarry Smith ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 2811eb62cbbSBarry Smith 2822493cbb0SBarry Smith /* determine if any processor has disassembled, if so we must 2832493cbb0SBarry Smith also disassemble ourselfs, in order that we may reassemble. */ 284227d817aSBarry Smith MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 285227d817aSBarry Smith if (mat->was_assembled && !other_disassembled) { 2862493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 2872493cbb0SBarry Smith } 2882493cbb0SBarry Smith 289227d817aSBarry Smith if (!mat->was_assembled && mode == FINAL_ASSEMBLY) { 29078b31e54SBarry Smith ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 2915e42470aSBarry Smith } 29278b31e54SBarry Smith ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 29378b31e54SBarry Smith ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 2945e42470aSBarry Smith 2958a729477SBarry Smith return 0; 2968a729477SBarry Smith } 2978a729477SBarry Smith 2982ee70a88SLois Curfman McInnes static int MatZeroEntries_MPIAIJ(Mat A) 2991eb62cbbSBarry Smith { 30044a69424SLois Curfman McInnes Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 301dbd7a890SLois Curfman McInnes int ierr; 30278b31e54SBarry Smith ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 30378b31e54SBarry Smith ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 3041eb62cbbSBarry Smith return 0; 3051eb62cbbSBarry Smith } 3061eb62cbbSBarry Smith 3071eb62cbbSBarry Smith /* the code does not do the diagonal entries correctly unless the 3081eb62cbbSBarry Smith matrix is square and the column and row owerships are identical. 3091eb62cbbSBarry Smith This is a BUG. The only way to fix it seems to be to access 3101eb62cbbSBarry Smith aij->A and aij->B directly and not through the MatZeroRows() 3111eb62cbbSBarry Smith routine. 3121eb62cbbSBarry Smith */ 31344a69424SLois Curfman McInnes static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 3141eb62cbbSBarry Smith { 31544a69424SLois Curfman McInnes Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 31617699dbbSLois Curfman McInnes int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 3176abc6512SBarry Smith int *procs,*nprocs,j,found,idx,nsends,*work; 31817699dbbSLois Curfman McInnes int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 3195392566eSBarry Smith int *rvalues,tag = A->tag,count,base,slen,n,*source; 320d6dfbf8fSBarry Smith int *lens,imdex,*lrows,*values; 321d6dfbf8fSBarry Smith MPI_Comm comm = A->comm; 3221eb62cbbSBarry Smith MPI_Request *send_waits,*recv_waits; 3231eb62cbbSBarry Smith MPI_Status recv_status,*send_status; 3241eb62cbbSBarry Smith IS istmp; 3251eb62cbbSBarry Smith 32678b31e54SBarry Smith ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr); 32778b31e54SBarry Smith ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 3281eb62cbbSBarry Smith 3291eb62cbbSBarry Smith /* first count number of contributors to each processor */ 3300452661fSBarry Smith nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 331cddf8d76SBarry Smith PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 3320452661fSBarry Smith owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 3331eb62cbbSBarry Smith for ( i=0; i<N; i++ ) { 3341eb62cbbSBarry Smith idx = rows[i]; 3351eb62cbbSBarry Smith found = 0; 33617699dbbSLois Curfman McInnes for ( j=0; j<size; j++ ) { 3371eb62cbbSBarry Smith if (idx >= owners[j] && idx < owners[j+1]) { 3381eb62cbbSBarry Smith nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 3391eb62cbbSBarry Smith } 3401eb62cbbSBarry Smith } 341bbb6d6a8SBarry Smith if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range"); 3421eb62cbbSBarry Smith } 34317699dbbSLois Curfman McInnes nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 3441eb62cbbSBarry Smith 3451eb62cbbSBarry Smith /* inform other processors of number of messages and max length*/ 3460452661fSBarry Smith work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 34717699dbbSLois Curfman McInnes MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 34817699dbbSLois Curfman McInnes nrecvs = work[rank]; 34917699dbbSLois Curfman McInnes MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 35017699dbbSLois Curfman McInnes nmax = work[rank]; 3510452661fSBarry Smith PetscFree(work); 3521eb62cbbSBarry Smith 3531eb62cbbSBarry Smith /* post receives: */ 3540452661fSBarry Smith rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 35578b31e54SBarry Smith CHKPTRQ(rvalues); 3560452661fSBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 35778b31e54SBarry Smith CHKPTRQ(recv_waits); 3581eb62cbbSBarry Smith for ( i=0; i<nrecvs; i++ ) { 359416022c9SBarry Smith MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 3601eb62cbbSBarry Smith } 3611eb62cbbSBarry Smith 3621eb62cbbSBarry Smith /* do sends: 3631eb62cbbSBarry Smith 1) starts[i] gives the starting index in svalues for stuff going to 3641eb62cbbSBarry Smith the ith processor 3651eb62cbbSBarry Smith */ 3660452661fSBarry Smith svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 3670452661fSBarry Smith send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 36878b31e54SBarry Smith CHKPTRQ(send_waits); 3690452661fSBarry Smith starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 3701eb62cbbSBarry Smith starts[0] = 0; 37117699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 3721eb62cbbSBarry Smith for ( i=0; i<N; i++ ) { 3731eb62cbbSBarry Smith svalues[starts[owner[i]]++] = rows[i]; 3741eb62cbbSBarry Smith } 3751eb62cbbSBarry Smith ISRestoreIndices(is,&rows); 3761eb62cbbSBarry Smith 3771eb62cbbSBarry Smith starts[0] = 0; 37817699dbbSLois Curfman McInnes for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 3791eb62cbbSBarry Smith count = 0; 38017699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 3811eb62cbbSBarry Smith if (procs[i]) { 382416022c9SBarry Smith MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 3831eb62cbbSBarry Smith } 3841eb62cbbSBarry Smith } 3850452661fSBarry Smith PetscFree(starts); 3861eb62cbbSBarry Smith 38717699dbbSLois Curfman McInnes base = owners[rank]; 3881eb62cbbSBarry Smith 3891eb62cbbSBarry Smith /* wait on receives */ 3900452661fSBarry Smith lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 3911eb62cbbSBarry Smith source = lens + nrecvs; 3921eb62cbbSBarry Smith count = nrecvs; slen = 0; 3931eb62cbbSBarry Smith while (count) { 394d6dfbf8fSBarry Smith MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 3951eb62cbbSBarry Smith /* unpack receives into our local space */ 3961eb62cbbSBarry Smith MPI_Get_count(&recv_status,MPI_INT,&n); 397d6dfbf8fSBarry Smith source[imdex] = recv_status.MPI_SOURCE; 398d6dfbf8fSBarry Smith lens[imdex] = n; 3991eb62cbbSBarry Smith slen += n; 4001eb62cbbSBarry Smith count--; 4011eb62cbbSBarry Smith } 4020452661fSBarry Smith PetscFree(recv_waits); 4031eb62cbbSBarry Smith 4041eb62cbbSBarry Smith /* move the data into the send scatter */ 4050452661fSBarry Smith lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 4061eb62cbbSBarry Smith count = 0; 4071eb62cbbSBarry Smith for ( i=0; i<nrecvs; i++ ) { 4081eb62cbbSBarry Smith values = rvalues + i*nmax; 4091eb62cbbSBarry Smith for ( j=0; j<lens[i]; j++ ) { 4101eb62cbbSBarry Smith lrows[count++] = values[j] - base; 4111eb62cbbSBarry Smith } 4121eb62cbbSBarry Smith } 4130452661fSBarry Smith PetscFree(rvalues); PetscFree(lens); 4140452661fSBarry Smith PetscFree(owner); PetscFree(nprocs); 4151eb62cbbSBarry Smith 4161eb62cbbSBarry Smith /* actually zap the local rows */ 417416022c9SBarry Smith ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 418464493b3SBarry Smith PLogObjectParent(A,istmp); 4190452661fSBarry Smith PetscFree(lrows); 42078b31e54SBarry Smith ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 42178b31e54SBarry Smith ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 42278b31e54SBarry Smith ierr = ISDestroy(istmp); CHKERRQ(ierr); 4231eb62cbbSBarry Smith 4241eb62cbbSBarry Smith /* wait on sends */ 4251eb62cbbSBarry Smith if (nsends) { 4260452661fSBarry Smith send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 42778b31e54SBarry Smith CHKPTRQ(send_status); 4281eb62cbbSBarry Smith MPI_Waitall(nsends,send_waits,send_status); 4290452661fSBarry Smith PetscFree(send_status); 4301eb62cbbSBarry Smith } 4310452661fSBarry Smith PetscFree(send_waits); PetscFree(svalues); 4321eb62cbbSBarry Smith 4331eb62cbbSBarry Smith return 0; 4341eb62cbbSBarry Smith } 4351eb62cbbSBarry Smith 436416022c9SBarry Smith static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 4371eb62cbbSBarry Smith { 438416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 4391eb62cbbSBarry Smith int ierr; 440416022c9SBarry Smith 44164119d58SLois Curfman McInnes ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 442416022c9SBarry Smith ierr = MatMult(a->A,xx,yy); CHKERRQ(ierr); 44364119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 444416022c9SBarry Smith ierr = MatMultAdd(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 4451eb62cbbSBarry Smith return 0; 4461eb62cbbSBarry Smith } 4471eb62cbbSBarry Smith 448416022c9SBarry Smith static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 449da3a660dSBarry Smith { 450416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 451da3a660dSBarry Smith int ierr; 45264119d58SLois Curfman McInnes ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 453416022c9SBarry Smith ierr = MatMultAdd(a->A,xx,yy,zz); CHKERRQ(ierr); 45464119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 455416022c9SBarry Smith ierr = MatMultAdd(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 456da3a660dSBarry Smith return 0; 457da3a660dSBarry Smith } 458da3a660dSBarry Smith 459416022c9SBarry Smith static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 460da3a660dSBarry Smith { 461416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 462da3a660dSBarry Smith int ierr; 463da3a660dSBarry Smith 464da3a660dSBarry Smith /* do nondiagonal part */ 465416022c9SBarry Smith ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr); 466da3a660dSBarry Smith /* send it on its way */ 467416022c9SBarry Smith ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES, 46864119d58SLois Curfman McInnes (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 469da3a660dSBarry Smith /* do local part */ 470416022c9SBarry Smith ierr = MatMultTrans(a->A,xx,yy); CHKERRQ(ierr); 471da3a660dSBarry Smith /* receive remote parts: note this assumes the values are not actually */ 472da3a660dSBarry Smith /* inserted in yy until the next line, which is true for my implementation*/ 473da3a660dSBarry Smith /* but is not perhaps always true. */ 474416022c9SBarry Smith ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES, 47564119d58SLois Curfman McInnes (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 476da3a660dSBarry Smith return 0; 477da3a660dSBarry Smith } 478da3a660dSBarry Smith 479416022c9SBarry Smith static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 480da3a660dSBarry Smith { 481416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 482da3a660dSBarry Smith int ierr; 483da3a660dSBarry Smith 484da3a660dSBarry Smith /* do nondiagonal part */ 485416022c9SBarry Smith ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr); 486da3a660dSBarry Smith /* send it on its way */ 487416022c9SBarry Smith ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES, 48864119d58SLois Curfman McInnes (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 489da3a660dSBarry Smith /* do local part */ 490416022c9SBarry Smith ierr = MatMultTransAdd(a->A,xx,yy,zz); CHKERRQ(ierr); 491da3a660dSBarry Smith /* receive remote parts: note this assumes the values are not actually */ 492da3a660dSBarry Smith /* inserted in yy until the next line, which is true for my implementation*/ 493da3a660dSBarry Smith /* but is not perhaps always true. */ 494416022c9SBarry Smith ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES, 49564119d58SLois Curfman McInnes (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 496da3a660dSBarry Smith return 0; 497da3a660dSBarry Smith } 498da3a660dSBarry Smith 4991eb62cbbSBarry Smith /* 5001eb62cbbSBarry Smith This only works correctly for square matrices where the subblock A->A is the 5011eb62cbbSBarry Smith diagonal block 5021eb62cbbSBarry Smith */ 503416022c9SBarry Smith static int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 5041eb62cbbSBarry Smith { 505416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 506416022c9SBarry Smith return MatGetDiagonal(a->A,v); 5071eb62cbbSBarry Smith } 5081eb62cbbSBarry Smith 509052efed2SBarry Smith static int MatScale_MPIAIJ(Scalar *aa,Mat A) 510052efed2SBarry Smith { 511052efed2SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 512052efed2SBarry Smith int ierr; 513052efed2SBarry Smith ierr = MatScale(aa,a->A); CHKERRQ(ierr); 514052efed2SBarry Smith ierr = MatScale(aa,a->B); CHKERRQ(ierr); 515052efed2SBarry Smith return 0; 516052efed2SBarry Smith } 517052efed2SBarry Smith 51844a69424SLois Curfman McInnes static int MatDestroy_MPIAIJ(PetscObject obj) 5191eb62cbbSBarry Smith { 5201eb62cbbSBarry Smith Mat mat = (Mat) obj; 52144a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 5221eb62cbbSBarry Smith int ierr; 523a5a9c739SBarry Smith #if defined(PETSC_LOG) 5246e35fa57SLois Curfman McInnes PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 525a5a9c739SBarry Smith #endif 5260452661fSBarry Smith PetscFree(aij->rowners); 52778b31e54SBarry Smith ierr = MatDestroy(aij->A); CHKERRQ(ierr); 52878b31e54SBarry Smith ierr = MatDestroy(aij->B); CHKERRQ(ierr); 5290452661fSBarry Smith if (aij->colmap) PetscFree(aij->colmap); 5300452661fSBarry Smith if (aij->garray) PetscFree(aij->garray); 5311eb62cbbSBarry Smith if (aij->lvec) VecDestroy(aij->lvec); 532a56f8943SBarry Smith if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 5330452661fSBarry Smith PetscFree(aij); 534a5a9c739SBarry Smith PLogObjectDestroy(mat); 5350452661fSBarry Smith PetscHeaderDestroy(mat); 5361eb62cbbSBarry Smith return 0; 5371eb62cbbSBarry Smith } 5387857610eSBarry Smith #include "draw.h" 539b16a3bb1SBarry Smith #include "pinclude/pviewer.h" 540ee50ffe9SBarry Smith 541416022c9SBarry Smith static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 5421eb62cbbSBarry Smith { 543416022c9SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 544416022c9SBarry Smith int ierr; 545416022c9SBarry Smith 54617699dbbSLois Curfman McInnes if (aij->size == 1) { 547416022c9SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 548416022c9SBarry Smith } 549a523beb4SLois Curfman McInnes else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported"); 550416022c9SBarry Smith return 0; 551416022c9SBarry Smith } 552416022c9SBarry Smith 553d7e8b826SBarry Smith static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 554416022c9SBarry Smith { 55544a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 556dbb450caSBarry Smith Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 557a56f8943SBarry Smith int ierr, format,shift = C->indexshift,rank; 558d13ab20cSBarry Smith PetscObject vobj = (PetscObject) viewer; 559d636dbe3SBarry Smith FILE *fd; 560416022c9SBarry Smith 561416022c9SBarry Smith if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) { 562416022c9SBarry Smith ierr = ViewerFileGetFormat_Private(viewer,&format); 56308480c60SBarry Smith if (format == FILE_FORMAT_INFO_DETAILED) { 564a56f8943SBarry Smith int nz,nzalloc,mem; 565a56f8943SBarry Smith MPI_Comm_rank(mat->comm,&rank); 566227d817aSBarry Smith ierr = ViewerFileGetPointer(viewer,&fd); CHKERRQ(ierr); 567a56f8943SBarry Smith ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem); 568a56f8943SBarry Smith MPIU_Seq_begin(mat->comm,1); 569a56f8943SBarry Smith fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d \n",rank,aij->m,nz, 570a56f8943SBarry Smith nzalloc,mem); 57108480c60SBarry Smith ierr = MatGetInfo(aij->A,MAT_LOCAL,&nz,&nzalloc,&mem); 57208480c60SBarry Smith fprintf(fd,"[%d] on diagonal nz %d \n",rank,nz); 57308480c60SBarry Smith ierr = MatGetInfo(aij->B,MAT_LOCAL,&nz,&nzalloc,&mem); 57408480c60SBarry Smith fprintf(fd,"[%d] off diagonal nz %d \n",rank,nz); 575a56f8943SBarry Smith fflush(fd); 576a56f8943SBarry Smith MPIU_Seq_end(mat->comm,1); 577a40aa06bSLois Curfman McInnes ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 578416022c9SBarry Smith return 0; 579416022c9SBarry Smith } 58008480c60SBarry Smith else if (format == FILE_FORMAT_INFO) { 58108480c60SBarry Smith return 0; 58208480c60SBarry Smith } 583416022c9SBarry Smith } 584416022c9SBarry Smith 585416022c9SBarry Smith if (vobj->type == ASCII_FILE_VIEWER) { 586227d817aSBarry Smith ierr = ViewerFileGetPointer(viewer,&fd); CHKERRQ(ierr); 5877f813858SBarry Smith MPIU_Seq_begin(mat->comm,1); 588d13ab20cSBarry Smith fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 58917699dbbSLois Curfman McInnes aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 5901eb62cbbSBarry Smith aij->cend); 59178b31e54SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 59278b31e54SBarry Smith ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 593d13ab20cSBarry Smith fflush(fd); 5947f813858SBarry Smith MPIU_Seq_end(mat->comm,1); 595d13ab20cSBarry Smith } 596416022c9SBarry Smith else { 597a56f8943SBarry Smith int size = aij->size; 598a56f8943SBarry Smith rank = aij->rank; 59917699dbbSLois Curfman McInnes if (size == 1) { 60078b31e54SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 60195373324SBarry Smith } 60295373324SBarry Smith else { 60395373324SBarry Smith /* assemble the entire matrix onto first processor. */ 60495373324SBarry Smith Mat A; 605ec8511deSBarry Smith Mat_SeqAIJ *Aloc; 6062eb8c8abSBarry Smith int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 60795373324SBarry Smith Scalar *a; 6082ee70a88SLois Curfman McInnes 60917699dbbSLois Curfman McInnes if (!rank) { 610b4fd4287SBarry Smith ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 611c451ab25SLois Curfman McInnes CHKERRQ(ierr); 61295373324SBarry Smith } 61395373324SBarry Smith else { 614b4fd4287SBarry Smith ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 615c451ab25SLois Curfman McInnes CHKERRQ(ierr); 61695373324SBarry Smith } 617464493b3SBarry Smith PLogObjectParent(mat,A); 618416022c9SBarry Smith 61995373324SBarry Smith /* copy over the A part */ 620ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) aij->A->data; 6212ee70a88SLois Curfman McInnes m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 62295373324SBarry Smith row = aij->rstart; 623dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 62495373324SBarry Smith for ( i=0; i<m; i++ ) { 625416022c9SBarry Smith ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 62695373324SBarry Smith row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 62795373324SBarry Smith } 6282ee70a88SLois Curfman McInnes aj = Aloc->j; 629dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 63095373324SBarry Smith 63195373324SBarry Smith /* copy over the B part */ 632ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) aij->B->data; 6332ee70a88SLois Curfman McInnes m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 63495373324SBarry Smith row = aij->rstart; 6350452661fSBarry Smith ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 636dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 63795373324SBarry Smith for ( i=0; i<m; i++ ) { 638416022c9SBarry Smith ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 63995373324SBarry Smith row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 64095373324SBarry Smith } 6410452661fSBarry Smith PetscFree(ct); 64278b31e54SBarry Smith ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 64378b31e54SBarry Smith ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 64417699dbbSLois Curfman McInnes if (!rank) { 64578b31e54SBarry Smith ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 64695373324SBarry Smith } 64778b31e54SBarry Smith ierr = MatDestroy(A); CHKERRQ(ierr); 64895373324SBarry Smith } 64995373324SBarry Smith } 6501eb62cbbSBarry Smith return 0; 6511eb62cbbSBarry Smith } 6521eb62cbbSBarry Smith 653416022c9SBarry Smith static int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 654416022c9SBarry Smith { 655416022c9SBarry Smith Mat mat = (Mat) obj; 656416022c9SBarry Smith int ierr; 657416022c9SBarry Smith PetscObject vobj = (PetscObject) viewer; 658416022c9SBarry Smith 659416022c9SBarry Smith if (!viewer) { 660416022c9SBarry Smith viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer; 661416022c9SBarry Smith } 662416022c9SBarry Smith if (vobj->cookie == DRAW_COOKIE && vobj->type == NULLWINDOW) return 0; 663416022c9SBarry Smith else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) { 664d7e8b826SBarry Smith ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 665416022c9SBarry Smith } 666416022c9SBarry Smith else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) { 667d7e8b826SBarry Smith ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 668d7e8b826SBarry Smith } 669d7e8b826SBarry Smith else if (vobj->cookie == VIEWER_COOKIE && vobj->type == MATLAB_VIEWER) { 670d7e8b826SBarry Smith ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 671416022c9SBarry Smith } 672416022c9SBarry Smith else if (vobj->cookie == DRAW_COOKIE) { 673d7e8b826SBarry Smith ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 674416022c9SBarry Smith } 675416022c9SBarry Smith else if (vobj->type == BINARY_FILE_VIEWER) { 676416022c9SBarry Smith return MatView_MPIAIJ_Binary(mat,viewer); 677416022c9SBarry Smith } 678416022c9SBarry Smith return 0; 679416022c9SBarry Smith } 680416022c9SBarry Smith 681ec8511deSBarry Smith extern int MatMarkDiag_SeqAIJ(Mat); 6821eb62cbbSBarry Smith /* 6831eb62cbbSBarry Smith This has to provide several versions. 6841eb62cbbSBarry Smith 6851eb62cbbSBarry Smith 1) per sequential 6861eb62cbbSBarry Smith 2) a) use only local smoothing updating outer values only once. 6871eb62cbbSBarry Smith b) local smoothing updating outer values each inner iteration 688d6dfbf8fSBarry Smith 3) color updating out values betwen colors. 6891eb62cbbSBarry Smith */ 690fc76ce87SLois Curfman McInnes static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 691dbb450caSBarry Smith double fshift,int its,Vec xx) 6928a729477SBarry Smith { 69344a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 694d6dfbf8fSBarry Smith Mat AA = mat->A, BB = mat->B; 695ec8511deSBarry Smith Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 6966abc6512SBarry Smith Scalar zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts; 6976abc6512SBarry Smith int ierr,*idx, *diag; 698416022c9SBarry Smith int n = mat->n, m = mat->m, i,shift = A->indexshift; 699da3a660dSBarry Smith Vec tt; 7008a729477SBarry Smith 701d6dfbf8fSBarry Smith VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 702dbb450caSBarry Smith xs = x + shift; /* shift by one for index start of 1 */ 703dbb450caSBarry Smith ls = ls + shift; 704ec8511deSBarry Smith if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;} 705d6dfbf8fSBarry Smith diag = A->diag; 706acb40c82SBarry Smith if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) { 70748d91487SBarry Smith SETERRQ(1,"MatRelax_MPIAIJ:Option not supported"); 708acb40c82SBarry Smith } 709da3a660dSBarry Smith if (flag & SOR_EISENSTAT) { 710da3a660dSBarry Smith /* Let A = L + U + D; where L is lower trianglar, 711da3a660dSBarry Smith U is upper triangular, E is diagonal; This routine applies 712da3a660dSBarry Smith 713da3a660dSBarry Smith (L + E)^{-1} A (U + E)^{-1} 714da3a660dSBarry Smith 715da3a660dSBarry Smith to a vector efficiently using Eisenstat's trick. This is for 716da3a660dSBarry Smith the case of SSOR preconditioner, so E is D/omega where omega 717da3a660dSBarry Smith is the relaxation factor. 718da3a660dSBarry Smith */ 71978b31e54SBarry Smith ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr); 720464493b3SBarry Smith PLogObjectParent(matin,tt); 721da3a660dSBarry Smith VecGetArray(tt,&t); 722da3a660dSBarry Smith scale = (2.0/omega) - 1.0; 723da3a660dSBarry Smith /* x = (E + U)^{-1} b */ 724da3a660dSBarry Smith VecSet(&zero,mat->lvec); 72564119d58SLois Curfman McInnes ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 72678b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 727da3a660dSBarry Smith for ( i=m-1; i>-1; i-- ) { 728da3a660dSBarry Smith n = A->i[i+1] - diag[i] - 1; 729dbb450caSBarry Smith idx = A->j + diag[i] + !shift; 730dbb450caSBarry Smith v = A->a + diag[i] + !shift; 731da3a660dSBarry Smith sum = b[i]; 732da3a660dSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 733dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 734da3a660dSBarry Smith n = B->i[i+1] - B->i[i]; 735dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 736dbb450caSBarry Smith v = B->a + B->i[i] + shift; 737da3a660dSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 738da3a660dSBarry Smith x[i] = omega*(sum/d); 739da3a660dSBarry Smith } 74064119d58SLois Curfman McInnes ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 74178b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 742da3a660dSBarry Smith 743da3a660dSBarry Smith /* t = b - (2*E - D)x */ 744da3a660dSBarry Smith v = A->a; 745dbb450caSBarry Smith for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ + shift])*x[i]; } 746da3a660dSBarry Smith 747da3a660dSBarry Smith /* t = (E + L)^{-1}t */ 748dbb450caSBarry Smith ts = t + shift; /* shifted by one for index start of a or mat->j*/ 749da3a660dSBarry Smith diag = A->diag; 750da3a660dSBarry Smith VecSet(&zero,mat->lvec); 75164119d58SLois Curfman McInnes ierr = VecPipelineBegin(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 75278b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 753da3a660dSBarry Smith for ( i=0; i<m; i++ ) { 754da3a660dSBarry Smith n = diag[i] - A->i[i]; 755dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 756dbb450caSBarry Smith v = A->a + A->i[i] + shift; 757da3a660dSBarry Smith sum = t[i]; 758da3a660dSBarry Smith SPARSEDENSEMDOT(sum,ts,v,idx,n); 759dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 760da3a660dSBarry Smith n = B->i[i+1] - B->i[i]; 761dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 762dbb450caSBarry Smith v = B->a + B->i[i] + shift; 763da3a660dSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 764da3a660dSBarry Smith t[i] = omega*(sum/d); 765da3a660dSBarry Smith } 76664119d58SLois Curfman McInnes ierr = VecPipelineEnd(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 76778b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 768da3a660dSBarry Smith /* x = x + t */ 769da3a660dSBarry Smith for ( i=0; i<m; i++ ) { x[i] += t[i]; } 770da3a660dSBarry Smith VecDestroy(tt); 771da3a660dSBarry Smith return 0; 772da3a660dSBarry Smith } 773da3a660dSBarry Smith 7741eb62cbbSBarry Smith 775d6dfbf8fSBarry Smith if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){ 776da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 777da3a660dSBarry Smith VecSet(&zero,mat->lvec); VecSet(&zero,xx); 778da3a660dSBarry Smith } 779da3a660dSBarry Smith else { 78064119d58SLois Curfman McInnes ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 78178b31e54SBarry Smith CHKERRQ(ierr); 78264119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 78378b31e54SBarry Smith CHKERRQ(ierr); 784da3a660dSBarry Smith } 785d6dfbf8fSBarry Smith while (its--) { 786d6dfbf8fSBarry Smith /* go down through the rows */ 78764119d58SLois Curfman McInnes ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 78878b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 789d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 790d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 791dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 792dbb450caSBarry Smith v = A->a + A->i[i] + shift; 793d6dfbf8fSBarry Smith sum = b[i]; 794d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 795dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 796d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 797dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 798dbb450caSBarry Smith v = B->a + B->i[i] + shift; 799d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 800d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 801d6dfbf8fSBarry Smith } 80264119d58SLois Curfman McInnes ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 80378b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 804d6dfbf8fSBarry Smith /* come up through the rows */ 80564119d58SLois Curfman McInnes ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 80678b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 807d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 808d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 809dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 810dbb450caSBarry Smith v = A->a + A->i[i] + shift; 811d6dfbf8fSBarry Smith sum = b[i]; 812d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 813dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 814d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 815dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 816dbb450caSBarry Smith v = B->a + B->i[i] + shift; 817d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 818d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 819d6dfbf8fSBarry Smith } 82064119d58SLois Curfman McInnes ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 82178b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 822d6dfbf8fSBarry Smith } 823d6dfbf8fSBarry Smith } 824d6dfbf8fSBarry Smith else if (flag & SOR_FORWARD_SWEEP){ 825da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 826da3a660dSBarry Smith VecSet(&zero,mat->lvec); 82764119d58SLois Curfman McInnes ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 82878b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 829da3a660dSBarry Smith for ( i=0; i<m; i++ ) { 830da3a660dSBarry Smith n = diag[i] - A->i[i]; 831dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 832dbb450caSBarry Smith v = A->a + A->i[i] + shift; 833da3a660dSBarry Smith sum = b[i]; 834da3a660dSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 835dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 836da3a660dSBarry Smith n = B->i[i+1] - B->i[i]; 837dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 838dbb450caSBarry Smith v = B->a + B->i[i] + shift; 839da3a660dSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 840da3a660dSBarry Smith x[i] = omega*(sum/d); 841da3a660dSBarry Smith } 84264119d58SLois Curfman McInnes ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 84378b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 844da3a660dSBarry Smith its--; 845da3a660dSBarry Smith } 846d6dfbf8fSBarry Smith while (its--) { 84764119d58SLois Curfman McInnes ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 84878b31e54SBarry Smith CHKERRQ(ierr); 84964119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 85078b31e54SBarry Smith CHKERRQ(ierr); 85164119d58SLois Curfman McInnes ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 85278b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 853d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 854d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 855dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 856dbb450caSBarry Smith v = A->a + A->i[i] + shift; 857d6dfbf8fSBarry Smith sum = b[i]; 858d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 859dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 860d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 861dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 862dbb450caSBarry Smith v = B->a + B->i[i] + shift; 863d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 864d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 865d6dfbf8fSBarry Smith } 86664119d58SLois Curfman McInnes ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 86778b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 868d6dfbf8fSBarry Smith } 869d6dfbf8fSBarry Smith } 870d6dfbf8fSBarry Smith else if (flag & SOR_BACKWARD_SWEEP){ 871da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 872da3a660dSBarry Smith VecSet(&zero,mat->lvec); 87364119d58SLois Curfman McInnes ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 87478b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 875da3a660dSBarry Smith for ( i=m-1; i>-1; i-- ) { 876da3a660dSBarry Smith n = A->i[i+1] - diag[i] - 1; 877dbb450caSBarry Smith idx = A->j + diag[i] + !shift; 878dbb450caSBarry Smith v = A->a + diag[i] + !shift; 879da3a660dSBarry Smith sum = b[i]; 880da3a660dSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 881dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 882da3a660dSBarry Smith n = B->i[i+1] - B->i[i]; 883dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 884dbb450caSBarry Smith v = B->a + B->i[i] + shift; 885da3a660dSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 886da3a660dSBarry Smith x[i] = omega*(sum/d); 887da3a660dSBarry Smith } 88864119d58SLois Curfman McInnes ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 88978b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 890da3a660dSBarry Smith its--; 891da3a660dSBarry Smith } 892d6dfbf8fSBarry Smith while (its--) { 89364119d58SLois Curfman McInnes ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN, 89478b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 89564119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN, 89678b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 89764119d58SLois Curfman McInnes ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 89878b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 899d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 900d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 901dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 902dbb450caSBarry Smith v = A->a + A->i[i] + shift; 903d6dfbf8fSBarry Smith sum = b[i]; 904d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 905dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 906d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 907dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 908dbb450caSBarry Smith v = B->a + B->i[i] + shift; 909d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 910d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 911d6dfbf8fSBarry Smith } 91264119d58SLois Curfman McInnes ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 91378b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 914d6dfbf8fSBarry Smith } 915d6dfbf8fSBarry Smith } 916d6dfbf8fSBarry Smith else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 917da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 918dbb450caSBarry Smith return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 919da3a660dSBarry Smith } 92064119d58SLois Curfman McInnes ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 92178b31e54SBarry Smith CHKERRQ(ierr); 92264119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 92378b31e54SBarry Smith CHKERRQ(ierr); 924d6dfbf8fSBarry Smith while (its--) { 925d6dfbf8fSBarry Smith /* go down through the rows */ 926d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 927d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 928dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 929dbb450caSBarry Smith v = A->a + A->i[i] + shift; 930d6dfbf8fSBarry Smith sum = b[i]; 931d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 932dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 933d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 934dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 935dbb450caSBarry Smith v = B->a + B->i[i] + shift; 936d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 937d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 938d6dfbf8fSBarry Smith } 939d6dfbf8fSBarry Smith /* come up through the rows */ 940d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 941d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 942dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 943dbb450caSBarry Smith v = A->a + A->i[i] + shift; 944d6dfbf8fSBarry Smith sum = b[i]; 945d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 946dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 947d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 948dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 949dbb450caSBarry Smith v = B->a + B->i[i] + shift; 950d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 951d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 952d6dfbf8fSBarry Smith } 953d6dfbf8fSBarry Smith } 954d6dfbf8fSBarry Smith } 955d6dfbf8fSBarry Smith else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 956da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 957dbb450caSBarry Smith return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 958da3a660dSBarry Smith } 95964119d58SLois Curfman McInnes ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 96078b31e54SBarry Smith CHKERRQ(ierr); 96164119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 96278b31e54SBarry Smith CHKERRQ(ierr); 963d6dfbf8fSBarry Smith while (its--) { 964d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 965d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 966dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 967dbb450caSBarry Smith v = A->a + A->i[i] + shift; 968d6dfbf8fSBarry Smith sum = b[i]; 969d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 970dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 971d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 972dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 973dbb450caSBarry Smith v = B->a + B->i[i] + shift; 974d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 975d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 976d6dfbf8fSBarry Smith } 977d6dfbf8fSBarry Smith } 978d6dfbf8fSBarry Smith } 979d6dfbf8fSBarry Smith else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 980da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 981dbb450caSBarry Smith return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 982da3a660dSBarry Smith } 98364119d58SLois Curfman McInnes ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 98478b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 98564119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 98678b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 987d6dfbf8fSBarry Smith while (its--) { 988d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 989d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 990dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 991dbb450caSBarry Smith v = A->a + A->i[i] + shift; 992d6dfbf8fSBarry Smith sum = b[i]; 993d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 994dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 995d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 996dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 997dbb450caSBarry Smith v = B->a + B->i[i] + shift; 998d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 999d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 1000d6dfbf8fSBarry Smith } 1001d6dfbf8fSBarry Smith } 1002d6dfbf8fSBarry Smith } 10038a729477SBarry Smith return 0; 10048a729477SBarry Smith } 1005a66be287SLois Curfman McInnes 1006fc76ce87SLois Curfman McInnes static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz, 1007fc76ce87SLois Curfman McInnes int *nzalloc,int *mem) 1008a66be287SLois Curfman McInnes { 1009a66be287SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1010a66be287SLois Curfman McInnes Mat A = mat->A, B = mat->B; 1011a66be287SLois Curfman McInnes int ierr, isend[3], irecv[3], nzA, nzallocA, memA; 1012a66be287SLois Curfman McInnes 101378b31e54SBarry Smith ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr); 101478b31e54SBarry Smith ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr); 1015a66be287SLois Curfman McInnes isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA; 1016a66be287SLois Curfman McInnes if (flag == MAT_LOCAL) { 1017a66be287SLois Curfman McInnes *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2]; 1018a66be287SLois Curfman McInnes } else if (flag == MAT_GLOBAL_MAX) { 1019d65a2f8fSBarry Smith MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm); 1020a66be287SLois Curfman McInnes *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 1021a66be287SLois Curfman McInnes } else if (flag == MAT_GLOBAL_SUM) { 1022d65a2f8fSBarry Smith MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm); 1023a66be287SLois Curfman McInnes *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 1024a66be287SLois Curfman McInnes } 1025a66be287SLois Curfman McInnes return 0; 1026a66be287SLois Curfman McInnes } 1027a66be287SLois Curfman McInnes 1028299609e3SLois Curfman McInnes extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 1029299609e3SLois Curfman McInnes extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 1030299609e3SLois Curfman McInnes extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 1031299609e3SLois Curfman McInnes extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 1032299609e3SLois Curfman McInnes extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 1033299609e3SLois Curfman McInnes extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1034299609e3SLois Curfman McInnes extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 1035299609e3SLois Curfman McInnes extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1036299609e3SLois Curfman McInnes 1037416022c9SBarry Smith static int MatSetOption_MPIAIJ(Mat A,MatOption op) 1038c74985f6SBarry Smith { 1039c0bbcb79SLois Curfman McInnes Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1040c74985f6SBarry Smith 1041c0bbcb79SLois Curfman McInnes if (op == NO_NEW_NONZERO_LOCATIONS || 1042c0bbcb79SLois Curfman McInnes op == YES_NEW_NONZERO_LOCATIONS || 1043c0bbcb79SLois Curfman McInnes op == COLUMNS_SORTED || 1044c0bbcb79SLois Curfman McInnes op == ROW_ORIENTED) { 1045c0bbcb79SLois Curfman McInnes MatSetOption(a->A,op); 1046c0bbcb79SLois Curfman McInnes MatSetOption(a->B,op); 1047c74985f6SBarry Smith } 1048c0bbcb79SLois Curfman McInnes else if (op == ROWS_SORTED || 1049c0bbcb79SLois Curfman McInnes op == SYMMETRIC_MATRIX || 1050c0bbcb79SLois Curfman McInnes op == STRUCTURALLY_SYMMETRIC_MATRIX || 1051c0bbcb79SLois Curfman McInnes op == YES_NEW_DIAGONALS) 1052c0bbcb79SLois Curfman McInnes PLogInfo((PetscObject)A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 10534b0e389bSBarry Smith else if (op == COLUMN_ORIENTED) { 10544b0e389bSBarry Smith a->roworiented = 0; 10554b0e389bSBarry Smith MatSetOption(a->A,op); 10564b0e389bSBarry Smith MatSetOption(a->B,op); 10574b0e389bSBarry Smith } 1058c0bbcb79SLois Curfman McInnes else if (op == NO_NEW_DIAGONALS) 10594d39ef84SLois Curfman McInnes {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:NO_NEW_DIAGONALS");} 1060c0bbcb79SLois Curfman McInnes else 10614d39ef84SLois Curfman McInnes {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");} 1062c74985f6SBarry Smith return 0; 1063c74985f6SBarry Smith } 1064c74985f6SBarry Smith 1065d1710a03SLois Curfman McInnes static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1066c74985f6SBarry Smith { 106744a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1068c74985f6SBarry Smith *m = mat->M; *n = mat->N; 1069c74985f6SBarry Smith return 0; 1070c74985f6SBarry Smith } 1071c74985f6SBarry Smith 1072d1710a03SLois Curfman McInnes static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1073c74985f6SBarry Smith { 107444a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1075b7c46309SBarry Smith *m = mat->m; *n = mat->N; 1076c74985f6SBarry Smith return 0; 1077c74985f6SBarry Smith } 1078c74985f6SBarry Smith 1079d1710a03SLois Curfman McInnes static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1080c74985f6SBarry Smith { 108144a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1082c74985f6SBarry Smith *m = mat->rstart; *n = mat->rend; 1083c74985f6SBarry Smith return 0; 1084c74985f6SBarry Smith } 1085c74985f6SBarry Smith 108639e00950SLois Curfman McInnes static int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 108739e00950SLois Curfman McInnes { 1088154123eaSLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1089154123eaSLois Curfman McInnes Scalar *vworkA, *vworkB, **pvA, **pvB; 1090154123eaSLois Curfman McInnes int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1091154123eaSLois Curfman McInnes int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 109239e00950SLois Curfman McInnes 1093b49de8d1SLois Curfman McInnes if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:Only local rows") 1094abc0e9e4SLois Curfman McInnes lrow = row - rstart; 109539e00950SLois Curfman McInnes 1096154123eaSLois Curfman McInnes pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1097154123eaSLois Curfman McInnes if (!v) {pvA = 0; pvB = 0;} 1098154123eaSLois Curfman McInnes if (!idx) {pcA = 0; if (!v) pcB = 0;} 109978b31e54SBarry Smith ierr = MatGetRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 110078b31e54SBarry Smith ierr = MatGetRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1101154123eaSLois Curfman McInnes nztot = nzA + nzB; 1102154123eaSLois Curfman McInnes 1103154123eaSLois Curfman McInnes if (v || idx) { 1104154123eaSLois Curfman McInnes if (nztot) { 1105154123eaSLois Curfman McInnes /* Sort by increasing column numbers, assuming A and B already sorted */ 1106299609e3SLois Curfman McInnes int imark; 1107154123eaSLois Curfman McInnes for (i=0; i<nzB; i++) cworkB[i] = mat->garray[cworkB[i]]; 1108154123eaSLois Curfman McInnes if (v) { 11090452661fSBarry Smith *v = (Scalar *) PetscMalloc( (nztot)*sizeof(Scalar) ); CHKPTRQ(*v); 111039e00950SLois Curfman McInnes for ( i=0; i<nzB; i++ ) { 1111154123eaSLois Curfman McInnes if (cworkB[i] < cstart) (*v)[i] = vworkB[i]; 1112154123eaSLois Curfman McInnes else break; 1113154123eaSLois Curfman McInnes } 1114154123eaSLois Curfman McInnes imark = i; 1115154123eaSLois Curfman McInnes for ( i=0; i<nzA; i++ ) (*v)[imark+i] = vworkA[i]; 1116299609e3SLois Curfman McInnes for ( i=imark; i<nzB; i++ ) (*v)[nzA+i] = vworkB[i]; 1117154123eaSLois Curfman McInnes } 1118154123eaSLois Curfman McInnes if (idx) { 11190452661fSBarry Smith *idx = (int *) PetscMalloc( (nztot)*sizeof(int) ); CHKPTRQ(*idx); 1120154123eaSLois Curfman McInnes for (i=0; i<nzA; i++) cworkA[i] += cstart; 1121154123eaSLois Curfman McInnes for ( i=0; i<nzB; i++ ) { 1122154123eaSLois Curfman McInnes if (cworkB[i] < cstart) (*idx)[i] = cworkB[i]; 1123154123eaSLois Curfman McInnes else break; 1124154123eaSLois Curfman McInnes } 1125154123eaSLois Curfman McInnes imark = i; 1126154123eaSLois Curfman McInnes for ( i=0; i<nzA; i++ ) (*idx)[imark+i] = cworkA[i]; 1127299609e3SLois Curfman McInnes for ( i=imark; i<nzB; i++ ) (*idx)[nzA+i] = cworkB[i]; 112839e00950SLois Curfman McInnes } 112939e00950SLois Curfman McInnes } 113039e00950SLois Curfman McInnes else {*idx = 0; *v=0;} 1131154123eaSLois Curfman McInnes } 113239e00950SLois Curfman McInnes *nz = nztot; 113378b31e54SBarry Smith ierr = MatRestoreRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 113478b31e54SBarry Smith ierr = MatRestoreRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 113539e00950SLois Curfman McInnes return 0; 113639e00950SLois Curfman McInnes } 113739e00950SLois Curfman McInnes 113839e00950SLois Curfman McInnes static int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 113939e00950SLois Curfman McInnes { 11400452661fSBarry Smith if (idx) PetscFree(*idx); 11410452661fSBarry Smith if (v) PetscFree(*v); 114239e00950SLois Curfman McInnes return 0; 114339e00950SLois Curfman McInnes } 114439e00950SLois Curfman McInnes 1145cddf8d76SBarry Smith static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1146855ac2c5SLois Curfman McInnes { 1147855ac2c5SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1148ec8511deSBarry Smith Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1149416022c9SBarry Smith int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1150416022c9SBarry Smith double sum = 0.0; 115104ca555eSLois Curfman McInnes Scalar *v; 115204ca555eSLois Curfman McInnes 115317699dbbSLois Curfman McInnes if (aij->size == 1) { 115414183eadSLois Curfman McInnes ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 115537fa93a5SLois Curfman McInnes } else { 115604ca555eSLois Curfman McInnes if (type == NORM_FROBENIUS) { 115704ca555eSLois Curfman McInnes v = amat->a; 115804ca555eSLois Curfman McInnes for (i=0; i<amat->nz; i++ ) { 115904ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX) 116004ca555eSLois Curfman McInnes sum += real(conj(*v)*(*v)); v++; 116104ca555eSLois Curfman McInnes #else 116204ca555eSLois Curfman McInnes sum += (*v)*(*v); v++; 116304ca555eSLois Curfman McInnes #endif 116404ca555eSLois Curfman McInnes } 116504ca555eSLois Curfman McInnes v = bmat->a; 116604ca555eSLois Curfman McInnes for (i=0; i<bmat->nz; i++ ) { 116704ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX) 116804ca555eSLois Curfman McInnes sum += real(conj(*v)*(*v)); v++; 116904ca555eSLois Curfman McInnes #else 117004ca555eSLois Curfman McInnes sum += (*v)*(*v); v++; 117104ca555eSLois Curfman McInnes #endif 117204ca555eSLois Curfman McInnes } 117304ca555eSLois Curfman McInnes MPI_Allreduce((void*)&sum,(void*)norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 117404ca555eSLois Curfman McInnes *norm = sqrt(*norm); 117504ca555eSLois Curfman McInnes } 117604ca555eSLois Curfman McInnes else if (type == NORM_1) { /* max column norm */ 117704ca555eSLois Curfman McInnes double *tmp, *tmp2; 117804ca555eSLois Curfman McInnes int *jj, *garray = aij->garray; 11790452661fSBarry Smith tmp = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp); 11800452661fSBarry Smith tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2); 1181cddf8d76SBarry Smith PetscMemzero(tmp,aij->N*sizeof(double)); 118204ca555eSLois Curfman McInnes *norm = 0.0; 118304ca555eSLois Curfman McInnes v = amat->a; jj = amat->j; 118404ca555eSLois Curfman McInnes for ( j=0; j<amat->nz; j++ ) { 1185579c6b6fSBarry Smith tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 118604ca555eSLois Curfman McInnes } 118704ca555eSLois Curfman McInnes v = bmat->a; jj = bmat->j; 118804ca555eSLois Curfman McInnes for ( j=0; j<bmat->nz; j++ ) { 1189579c6b6fSBarry Smith tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 119004ca555eSLois Curfman McInnes } 119104ca555eSLois Curfman McInnes MPI_Allreduce((void*)tmp,(void*)tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm); 119204ca555eSLois Curfman McInnes for ( j=0; j<aij->N; j++ ) { 119304ca555eSLois Curfman McInnes if (tmp2[j] > *norm) *norm = tmp2[j]; 119404ca555eSLois Curfman McInnes } 11950452661fSBarry Smith PetscFree(tmp); PetscFree(tmp2); 119604ca555eSLois Curfman McInnes } 119704ca555eSLois Curfman McInnes else if (type == NORM_INFINITY) { /* max row norm */ 1198515d9167SLois Curfman McInnes double ntemp = 0.0; 119904ca555eSLois Curfman McInnes for ( j=0; j<amat->m; j++ ) { 1200dbb450caSBarry Smith v = amat->a + amat->i[j] + shift; 120104ca555eSLois Curfman McInnes sum = 0.0; 120204ca555eSLois Curfman McInnes for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1203cddf8d76SBarry Smith sum += PetscAbsScalar(*v); v++; 120404ca555eSLois Curfman McInnes } 1205dbb450caSBarry Smith v = bmat->a + bmat->i[j] + shift; 120604ca555eSLois Curfman McInnes for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1207cddf8d76SBarry Smith sum += PetscAbsScalar(*v); v++; 120804ca555eSLois Curfman McInnes } 1209515d9167SLois Curfman McInnes if (sum > ntemp) ntemp = sum; 121004ca555eSLois Curfman McInnes } 1211515d9167SLois Curfman McInnes MPI_Allreduce((void*)&ntemp,(void*)norm,1,MPI_DOUBLE,MPI_MAX,mat->comm); 121204ca555eSLois Curfman McInnes } 121304ca555eSLois Curfman McInnes else { 1214515d9167SLois Curfman McInnes SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm"); 121504ca555eSLois Curfman McInnes } 121637fa93a5SLois Curfman McInnes } 1217855ac2c5SLois Curfman McInnes return 0; 1218855ac2c5SLois Curfman McInnes } 1219855ac2c5SLois Curfman McInnes 12200de55854SLois Curfman McInnes static int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1221b7c46309SBarry Smith { 1222b7c46309SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1223dbb450caSBarry Smith Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1224416022c9SBarry Smith int ierr,shift = Aloc->indexshift; 1225416022c9SBarry Smith Mat B; 1226b7c46309SBarry Smith int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1227b7c46309SBarry Smith Scalar *array; 1228b7c46309SBarry Smith 12293638b69dSLois Curfman McInnes if (matout == PETSC_NULL && M != N) 12303638b69dSLois Curfman McInnes SETERRQ(1,"MatTranspose_MPIAIJ:Square matrix only for in-place"); 1231b4fd4287SBarry Smith ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0, 1232b4fd4287SBarry Smith PETSC_NULL,&B); CHKERRQ(ierr); 1233b7c46309SBarry Smith 1234b7c46309SBarry Smith /* copy over the A part */ 1235ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) a->A->data; 1236b7c46309SBarry Smith m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1237b7c46309SBarry Smith row = a->rstart; 1238dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1239b7c46309SBarry Smith for ( i=0; i<m; i++ ) { 1240416022c9SBarry Smith ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1241b7c46309SBarry Smith row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1242b7c46309SBarry Smith } 1243b7c46309SBarry Smith aj = Aloc->j; 12444af08d9eSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1245b7c46309SBarry Smith 1246b7c46309SBarry Smith /* copy over the B part */ 1247ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) a->B->data; 1248b7c46309SBarry Smith m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1249b7c46309SBarry Smith row = a->rstart; 12500452661fSBarry Smith ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1251dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1252b7c46309SBarry Smith for ( i=0; i<m; i++ ) { 1253416022c9SBarry Smith ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1254b7c46309SBarry Smith row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1255b7c46309SBarry Smith } 12560452661fSBarry Smith PetscFree(ct); 1257b7c46309SBarry Smith ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1258b7c46309SBarry Smith ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 12593638b69dSLois Curfman McInnes if (matout != PETSC_NULL) { 12600de55854SLois Curfman McInnes *matout = B; 12610de55854SLois Curfman McInnes } else { 12620de55854SLois Curfman McInnes /* This isn't really an in-place transpose .... but free data structures from a */ 12630452661fSBarry Smith PetscFree(a->rowners); 12640de55854SLois Curfman McInnes ierr = MatDestroy(a->A); CHKERRQ(ierr); 12650de55854SLois Curfman McInnes ierr = MatDestroy(a->B); CHKERRQ(ierr); 12660452661fSBarry Smith if (a->colmap) PetscFree(a->colmap); 12670452661fSBarry Smith if (a->garray) PetscFree(a->garray); 12680de55854SLois Curfman McInnes if (a->lvec) VecDestroy(a->lvec); 1269a56f8943SBarry Smith if (a->Mvctx) VecScatterDestroy(a->Mvctx); 12700452661fSBarry Smith PetscFree(a); 1271416022c9SBarry Smith PetscMemcpy(A,B,sizeof(struct _Mat)); 12720452661fSBarry Smith PetscHeaderDestroy(B); 12730de55854SLois Curfman McInnes } 1274b7c46309SBarry Smith return 0; 1275b7c46309SBarry Smith } 1276b7c46309SBarry Smith 1277682d7d0cSBarry Smith extern int MatPrintHelp_SeqAIJ(Mat); 1278682d7d0cSBarry Smith static int MatPrintHelp_MPIAIJ(Mat A) 1279682d7d0cSBarry Smith { 1280682d7d0cSBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1281682d7d0cSBarry Smith 1282682d7d0cSBarry Smith if (!a->rank) return MatPrintHelp_SeqAIJ(a->A); 1283682d7d0cSBarry Smith else return 0; 1284682d7d0cSBarry Smith } 1285682d7d0cSBarry Smith 1286fc76ce87SLois Curfman McInnes extern int MatConvert_MPIAIJ(Mat,MatType,Mat *); 12873d1612f7SBarry Smith static int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 12882f86bd48SSatish Balay extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1289598137ffSSatish Balay int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **); 12908a729477SBarry Smith /* -------------------------------------------------------------------*/ 12912ee70a88SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 129239e00950SLois Curfman McInnes MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 129344a69424SLois Curfman McInnes MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 129444a69424SLois Curfman McInnes MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1295299609e3SLois Curfman McInnes MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ, 1296299609e3SLois Curfman McInnes MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ, 1297299609e3SLois Curfman McInnes MatLUFactor_MPIAIJ,0, 129844a69424SLois Curfman McInnes MatRelax_MPIAIJ, 1299b7c46309SBarry Smith MatTranspose_MPIAIJ, 1300a66be287SLois Curfman McInnes MatGetInfo_MPIAIJ,0, 1301855ac2c5SLois Curfman McInnes MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ, 1302ee50ffe9SBarry Smith MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 13031eb62cbbSBarry Smith 0, 1304299609e3SLois Curfman McInnes MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1305299609e3SLois Curfman McInnes MatGetReordering_MPIAIJ, 1306299609e3SLois Curfman McInnes MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0, 1307d1710a03SLois Curfman McInnes MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1308299609e3SLois Curfman McInnes MatILUFactorSymbolic_MPIAIJ,0, 13093d1612f7SBarry Smith 0,0,MatConvert_MPIAIJ,0,0,MatConvertSameType_MPIAIJ,0,0, 1310b49de8d1SLois Curfman McInnes 0,0,0, 1311598137ffSSatish Balay MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0, 1312052efed2SBarry Smith MatPrintHelp_MPIAIJ, 1313052efed2SBarry Smith MatScale_MPIAIJ}; 13148a729477SBarry Smith 13151987afe7SBarry Smith /*@C 1316ff756334SLois Curfman McInnes MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1317*3a511b96SLois Curfman McInnes (the default parallel PETSc format). For good matrix assembly performance 1318*3a511b96SLois Curfman McInnes the user should preallocate the matrix storage by setting the parameters 1319*3a511b96SLois Curfman McInnes d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1320*3a511b96SLois Curfman McInnes performance can be increased by more than a factor of 50. 13218a729477SBarry Smith 13228a729477SBarry Smith Input Parameters: 13231eb62cbbSBarry Smith . comm - MPI communicator 13247d3e4905SLois Curfman McInnes . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 13257d3e4905SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated 13267d3e4905SLois Curfman McInnes if N is given) 13277d3e4905SLois Curfman McInnes . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 13287d3e4905SLois Curfman McInnes . N - number of global columns (or PETSC_DECIDE to have calculated 13297d3e4905SLois Curfman McInnes if n is given) 1330ab693e5aSLois Curfman McInnes . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1331ff756334SLois Curfman McInnes (same for all local rows) 1332ab693e5aSLois Curfman McInnes . d_nzz - number of nonzeros per row in diagonal portion of local submatrix 1333ab693e5aSLois Curfman McInnes or null (possibly different for each row). You must leave room 1334ab693e5aSLois Curfman McInnes for the diagonal entry even if it is zero. 1335ab693e5aSLois Curfman McInnes . o_nz - number of nonzeros per row in off-diagonal portion of local 1336ab693e5aSLois Curfman McInnes submatrix (same for all local rows). 1337ab693e5aSLois Curfman McInnes . o_nzz - number of nonzeros per row in off-diagonal portion of local 1338ab693e5aSLois Curfman McInnes submatrix or null (possibly different for each row). 13398a729477SBarry Smith 1340ff756334SLois Curfman McInnes Output Parameter: 13418a729477SBarry Smith . newmat - the matrix 13428a729477SBarry Smith 1343ff756334SLois Curfman McInnes Notes: 1344ff756334SLois Curfman McInnes The AIJ format (also called the Yale sparse matrix format or 1345ff756334SLois Curfman McInnes compressed row storage), is fully compatible with standard Fortran 77 13460002213bSLois Curfman McInnes storage. That is, the stored row and column indices can begin at 13470002213bSLois Curfman McInnes either one (as in Fortran) or zero. See the users manual for details. 1348ff756334SLois Curfman McInnes 1349ff756334SLois Curfman McInnes The user MUST specify either the local or global matrix dimensions 1350ff756334SLois Curfman McInnes (possibly both). 1351ff756334SLois Curfman McInnes 13525511cfe3SLois Curfman McInnes By default, this format uses inodes (identical nodes) when possible. 13535511cfe3SLois Curfman McInnes We search for consecutive rows with the same nonzero structure, thereby 13545511cfe3SLois Curfman McInnes reusing matrix information to achieve increased efficiency. 13555511cfe3SLois Curfman McInnes 13565511cfe3SLois Curfman McInnes Options Database Keys: 13575511cfe3SLois Curfman McInnes $ -mat_aij_no_inode - Do not use inodes 13585511cfe3SLois Curfman McInnes $ -mat_aij_inode_limit <limit> - Set inode limit (max limit=5) 13595511cfe3SLois Curfman McInnes 1360e0245417SLois Curfman McInnes Storage Information: 1361e0245417SLois Curfman McInnes For a square global matrix we define each processor's diagonal portion 1362e0245417SLois Curfman McInnes to be its local rows and the corresponding columns (a square submatrix); 1363e0245417SLois Curfman McInnes each processor's off-diagonal portion encompasses the remainder of the 1364e0245417SLois Curfman McInnes local matrix (a rectangular submatrix). 1365e0245417SLois Curfman McInnes 1366e0245417SLois Curfman McInnes The user can specify preallocated storage for the diagonal part of 13675ace5be8SLois Curfman McInnes the local submatrix with either d_nz or d_nnz (not both). Set 13685ace5be8SLois Curfman McInnes d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 13695ace5be8SLois Curfman McInnes memory allocation. Likewise, specify preallocated storage for the 13705ace5be8SLois Curfman McInnes off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1371ff756334SLois Curfman McInnes 13725511cfe3SLois Curfman McInnes Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 13735511cfe3SLois Curfman McInnes the figure below we depict these three local rows and all columns (0-11). 13742191d07cSBarry Smith 1375b810aeb4SBarry Smith $ 0 1 2 3 4 5 6 7 8 9 10 11 1376b810aeb4SBarry Smith $ ------------------- 13775511cfe3SLois Curfman McInnes $ row 3 | o o o d d d o o o o o o 13785511cfe3SLois Curfman McInnes $ row 4 | o o o d d d o o o o o o 13795511cfe3SLois Curfman McInnes $ row 5 | o o o d d d o o o o o o 13805511cfe3SLois Curfman McInnes $ ------------------- 1381b810aeb4SBarry Smith $ 1382b810aeb4SBarry Smith 13835511cfe3SLois Curfman McInnes Thus, any entries in the d locations are stored in the d (diagonal) 13845511cfe3SLois Curfman McInnes submatrix, and any entries in the o locations are stored in the 13855511cfe3SLois Curfman McInnes o (off-diagonal) submatrix. Note that the d and the o submatrices are 13865511cfe3SLois Curfman McInnes stored simply in the MATSEQAIJ format for compressed row storage. 13875511cfe3SLois Curfman McInnes 13885511cfe3SLois Curfman McInnes Now d_nz should indicate the number of nonzeros per row in the d matrix, 13895511cfe3SLois Curfman McInnes and o_nz should indicate the number of nonzeros per row in the o matrix. 13905511cfe3SLois Curfman McInnes In general, for PDE problems in which most nonzeros are near the diagonal, 1391*3a511b96SLois Curfman McInnes one expects d_nz >> o_nz. For additional details, see the users manual 1392*3a511b96SLois Curfman McInnes chapter on matrices and the file $(PETSC_DIR)/Performance. 1393*3a511b96SLois Curfman McInnes 13942191d07cSBarry Smith 1395dbd7a890SLois Curfman McInnes .keywords: matrix, aij, compressed row, sparse, parallel 1396ff756334SLois Curfman McInnes 1397fafbff53SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 13988a729477SBarry Smith @*/ 13991eb62cbbSBarry Smith int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 14001eb62cbbSBarry Smith int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *newmat) 14018a729477SBarry Smith { 14028a729477SBarry Smith Mat mat; 1403416022c9SBarry Smith Mat_MPIAIJ *a; 14046abc6512SBarry Smith int ierr, i,sum[2],work[2]; 1405416022c9SBarry Smith 14068a729477SBarry Smith *newmat = 0; 14070452661fSBarry Smith PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 1408a5a9c739SBarry Smith PLogObjectCreate(mat); 14090452661fSBarry Smith mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1410416022c9SBarry Smith PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 141144a69424SLois Curfman McInnes mat->destroy = MatDestroy_MPIAIJ; 141244a69424SLois Curfman McInnes mat->view = MatView_MPIAIJ; 14138a729477SBarry Smith mat->factor = 0; 1414c456f294SBarry Smith mat->assembled = PETSC_FALSE; 1415d6dfbf8fSBarry Smith 141664119d58SLois Curfman McInnes a->insertmode = NOT_SET_VALUES; 141717699dbbSLois Curfman McInnes MPI_Comm_rank(comm,&a->rank); 141817699dbbSLois Curfman McInnes MPI_Comm_size(comm,&a->size); 14191eb62cbbSBarry Smith 1420b4fd4287SBarry Smith if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 14211987afe7SBarry Smith SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSc decide rows but set d_nnz or o_nnz"); 14221987afe7SBarry Smith 1423dbd7a890SLois Curfman McInnes if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 14241eb62cbbSBarry Smith work[0] = m; work[1] = n; 1425d65a2f8fSBarry Smith MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1426dbd7a890SLois Curfman McInnes if (M == PETSC_DECIDE) M = sum[0]; 1427dbd7a890SLois Curfman McInnes if (N == PETSC_DECIDE) N = sum[1]; 14281eb62cbbSBarry Smith } 142917699dbbSLois Curfman McInnes if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);} 143017699dbbSLois Curfman McInnes if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);} 1431416022c9SBarry Smith a->m = m; 1432416022c9SBarry Smith a->n = n; 1433416022c9SBarry Smith a->N = N; 1434416022c9SBarry Smith a->M = M; 14351eb62cbbSBarry Smith 14361eb62cbbSBarry Smith /* build local table of row and column ownerships */ 14370452661fSBarry Smith a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1438579c6b6fSBarry Smith PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 143917699dbbSLois Curfman McInnes a->cowners = a->rowners + a->size + 1; 1440416022c9SBarry Smith MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm); 1441416022c9SBarry Smith a->rowners[0] = 0; 144217699dbbSLois Curfman McInnes for ( i=2; i<=a->size; i++ ) { 1443416022c9SBarry Smith a->rowners[i] += a->rowners[i-1]; 14448a729477SBarry Smith } 144517699dbbSLois Curfman McInnes a->rstart = a->rowners[a->rank]; 144617699dbbSLois Curfman McInnes a->rend = a->rowners[a->rank+1]; 1447416022c9SBarry Smith MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm); 1448416022c9SBarry Smith a->cowners[0] = 0; 144917699dbbSLois Curfman McInnes for ( i=2; i<=a->size; i++ ) { 1450416022c9SBarry Smith a->cowners[i] += a->cowners[i-1]; 14518a729477SBarry Smith } 145217699dbbSLois Curfman McInnes a->cstart = a->cowners[a->rank]; 145317699dbbSLois Curfman McInnes a->cend = a->cowners[a->rank+1]; 14548a729477SBarry Smith 14555ace5be8SLois Curfman McInnes if (d_nz == PETSC_DEFAULT) d_nz = 5; 1456416022c9SBarry Smith ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&a->A); CHKERRQ(ierr); 1457416022c9SBarry Smith PLogObjectParent(mat,a->A); 14587b8455f0SLois Curfman McInnes if (o_nz == PETSC_DEFAULT) o_nz = 0; 1459416022c9SBarry Smith ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&a->B); CHKERRQ(ierr); 1460416022c9SBarry Smith PLogObjectParent(mat,a->B); 14618a729477SBarry Smith 14621eb62cbbSBarry Smith /* build cache for off array entries formed */ 1463416022c9SBarry Smith ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr); 1464416022c9SBarry Smith a->colmap = 0; 1465416022c9SBarry Smith a->garray = 0; 14664b0e389bSBarry Smith a->roworiented = 1; 14678a729477SBarry Smith 14681eb62cbbSBarry Smith /* stuff used for matrix vector multiply */ 1469416022c9SBarry Smith a->lvec = 0; 1470416022c9SBarry Smith a->Mvctx = 0; 14718a729477SBarry Smith 1472d6dfbf8fSBarry Smith *newmat = mat; 1473d6dfbf8fSBarry Smith return 0; 1474d6dfbf8fSBarry Smith } 1475c74985f6SBarry Smith 14763d1612f7SBarry Smith static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1477d6dfbf8fSBarry Smith { 1478d6dfbf8fSBarry Smith Mat mat; 1479416022c9SBarry Smith Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 148025cdf11fSBarry Smith int ierr, len,flg; 1481d6dfbf8fSBarry Smith 1482416022c9SBarry Smith *newmat = 0; 14830452661fSBarry Smith PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1484a5a9c739SBarry Smith PLogObjectCreate(mat); 14850452661fSBarry Smith mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1486416022c9SBarry Smith PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 148744a69424SLois Curfman McInnes mat->destroy = MatDestroy_MPIAIJ; 148844a69424SLois Curfman McInnes mat->view = MatView_MPIAIJ; 1489d6dfbf8fSBarry Smith mat->factor = matin->factor; 1490c456f294SBarry Smith mat->assembled = PETSC_TRUE; 1491d6dfbf8fSBarry Smith 1492416022c9SBarry Smith a->m = oldmat->m; 1493416022c9SBarry Smith a->n = oldmat->n; 1494416022c9SBarry Smith a->M = oldmat->M; 1495416022c9SBarry Smith a->N = oldmat->N; 1496d6dfbf8fSBarry Smith 1497416022c9SBarry Smith a->rstart = oldmat->rstart; 1498416022c9SBarry Smith a->rend = oldmat->rend; 1499416022c9SBarry Smith a->cstart = oldmat->cstart; 1500416022c9SBarry Smith a->cend = oldmat->cend; 150117699dbbSLois Curfman McInnes a->size = oldmat->size; 150217699dbbSLois Curfman McInnes a->rank = oldmat->rank; 150364119d58SLois Curfman McInnes a->insertmode = NOT_SET_VALUES; 1504d6dfbf8fSBarry Smith 15050452661fSBarry Smith a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners); 150617699dbbSLois Curfman McInnes PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 150717699dbbSLois Curfman McInnes PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int)); 1508416022c9SBarry Smith ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 15092ee70a88SLois Curfman McInnes if (oldmat->colmap) { 15100452661fSBarry Smith a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1511416022c9SBarry Smith PLogObjectMemory(mat,(a->N)*sizeof(int)); 1512416022c9SBarry Smith PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1513416022c9SBarry Smith } else a->colmap = 0; 1514ec8511deSBarry Smith if (oldmat->garray && (len = ((Mat_SeqAIJ *) (oldmat->B->data))->n)) { 15150452661fSBarry Smith a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 1516464493b3SBarry Smith PLogObjectMemory(mat,len*sizeof(int)); 1517416022c9SBarry Smith PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1518416022c9SBarry Smith } else a->garray = 0; 1519d6dfbf8fSBarry Smith 1520416022c9SBarry Smith ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1521416022c9SBarry Smith PLogObjectParent(mat,a->lvec); 1522a56f8943SBarry Smith ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1523416022c9SBarry Smith PLogObjectParent(mat,a->Mvctx); 1524416022c9SBarry Smith ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1525416022c9SBarry Smith PLogObjectParent(mat,a->A); 1526416022c9SBarry Smith ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1527416022c9SBarry Smith PLogObjectParent(mat,a->B); 15285dd7a6c7SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 152925cdf11fSBarry Smith if (flg) { 1530682d7d0cSBarry Smith ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1531682d7d0cSBarry Smith } 15328a729477SBarry Smith *newmat = mat; 15338a729477SBarry Smith return 0; 15348a729477SBarry Smith } 1535416022c9SBarry Smith 1536416022c9SBarry Smith #include "sysio.h" 1537416022c9SBarry Smith 1538c456f294SBarry Smith int MatLoad_MPIAIJ(Viewer bview,MatType type,Mat *newmat) 1539416022c9SBarry Smith { 1540d65a2f8fSBarry Smith Mat A; 1541416022c9SBarry Smith int i, nz, ierr, j,rstart, rend, fd; 1542d65a2f8fSBarry Smith Scalar *vals,*svals; 1543416022c9SBarry Smith PetscObject vobj = (PetscObject) bview; 1544416022c9SBarry Smith MPI_Comm comm = vobj->comm; 1545416022c9SBarry Smith MPI_Status status; 154617699dbbSLois Curfman McInnes int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1547d65a2f8fSBarry Smith int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1548d65a2f8fSBarry Smith int tag = ((PetscObject)bview)->tag; 1549416022c9SBarry Smith 155017699dbbSLois Curfman McInnes MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 155117699dbbSLois Curfman McInnes if (!rank) { 1552416022c9SBarry Smith ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr); 1553416022c9SBarry Smith ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr); 1554c456f294SBarry Smith if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJ:not matrix object"); 1555416022c9SBarry Smith } 1556416022c9SBarry Smith 1557416022c9SBarry Smith MPI_Bcast(header+1,3,MPI_INT,0,comm); 1558416022c9SBarry Smith M = header[1]; N = header[2]; 1559416022c9SBarry Smith /* determine ownership of all rows */ 156017699dbbSLois Curfman McInnes m = M/size + ((M % size) > rank); 15610452661fSBarry Smith rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1562416022c9SBarry Smith MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1563416022c9SBarry Smith rowners[0] = 0; 156417699dbbSLois Curfman McInnes for ( i=2; i<=size; i++ ) { 1565416022c9SBarry Smith rowners[i] += rowners[i-1]; 1566416022c9SBarry Smith } 156717699dbbSLois Curfman McInnes rstart = rowners[rank]; 156817699dbbSLois Curfman McInnes rend = rowners[rank+1]; 1569416022c9SBarry Smith 1570416022c9SBarry Smith /* distribute row lengths to all processors */ 15710452661fSBarry Smith ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1572416022c9SBarry Smith offlens = ourlens + (rend-rstart); 157317699dbbSLois Curfman McInnes if (!rank) { 15740452661fSBarry Smith rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1575416022c9SBarry Smith ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr); 15760452661fSBarry Smith sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 157717699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1578d65a2f8fSBarry Smith MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 15790452661fSBarry Smith PetscFree(sndcounts); 1580416022c9SBarry Smith } 1581416022c9SBarry Smith else { 1582416022c9SBarry Smith MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1583416022c9SBarry Smith } 1584416022c9SBarry Smith 158517699dbbSLois Curfman McInnes if (!rank) { 1586416022c9SBarry Smith /* calculate the number of nonzeros on each processor */ 15870452661fSBarry Smith procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1588cddf8d76SBarry Smith PetscMemzero(procsnz,size*sizeof(int)); 158917699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 1590416022c9SBarry Smith for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1591416022c9SBarry Smith procsnz[i] += rowlengths[j]; 1592416022c9SBarry Smith } 1593416022c9SBarry Smith } 15940452661fSBarry Smith PetscFree(rowlengths); 1595416022c9SBarry Smith 1596416022c9SBarry Smith /* determine max buffer needed and allocate it */ 1597416022c9SBarry Smith maxnz = 0; 159817699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 15990452661fSBarry Smith maxnz = PetscMax(maxnz,procsnz[i]); 1600416022c9SBarry Smith } 16010452661fSBarry Smith cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1602416022c9SBarry Smith 1603416022c9SBarry Smith /* read in my part of the matrix column indices */ 1604416022c9SBarry Smith nz = procsnz[0]; 16050452661fSBarry Smith mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1606d65a2f8fSBarry Smith ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr); 1607d65a2f8fSBarry Smith 1608d65a2f8fSBarry Smith /* read in every one elses and ship off */ 160917699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { 1610d65a2f8fSBarry Smith nz = procsnz[i]; 1611416022c9SBarry Smith ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr); 1612d65a2f8fSBarry Smith MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1613d65a2f8fSBarry Smith } 16140452661fSBarry Smith PetscFree(cols); 1615416022c9SBarry Smith } 1616416022c9SBarry Smith else { 1617416022c9SBarry Smith /* determine buffer space needed for message */ 1618416022c9SBarry Smith nz = 0; 1619416022c9SBarry Smith for ( i=0; i<m; i++ ) { 1620416022c9SBarry Smith nz += ourlens[i]; 1621416022c9SBarry Smith } 16220452661fSBarry Smith mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1623416022c9SBarry Smith 1624416022c9SBarry Smith /* receive message of column indices*/ 1625d65a2f8fSBarry Smith MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1626416022c9SBarry Smith MPI_Get_count(&status,MPI_INT,&maxnz); 1627c456f294SBarry Smith if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file"); 1628416022c9SBarry Smith } 1629416022c9SBarry Smith 1630416022c9SBarry Smith /* loop over local rows, determining number of off diagonal entries */ 1631cddf8d76SBarry Smith PetscMemzero(offlens,m*sizeof(int)); 1632416022c9SBarry Smith jj = 0; 1633416022c9SBarry Smith for ( i=0; i<m; i++ ) { 1634416022c9SBarry Smith for ( j=0; j<ourlens[i]; j++ ) { 1635d65a2f8fSBarry Smith if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1636416022c9SBarry Smith jj++; 1637416022c9SBarry Smith } 1638416022c9SBarry Smith } 1639d65a2f8fSBarry Smith 1640d65a2f8fSBarry Smith /* create our matrix */ 1641416022c9SBarry Smith for ( i=0; i<m; i++ ) { 1642416022c9SBarry Smith ourlens[i] -= offlens[i]; 1643416022c9SBarry Smith } 1644d65a2f8fSBarry Smith ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1645d65a2f8fSBarry Smith A = *newmat; 1646d65a2f8fSBarry Smith MatSetOption(A,COLUMNS_SORTED); 1647d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 1648d65a2f8fSBarry Smith ourlens[i] += offlens[i]; 1649d65a2f8fSBarry Smith } 1650416022c9SBarry Smith 165117699dbbSLois Curfman McInnes if (!rank) { 16520452661fSBarry Smith vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1653416022c9SBarry Smith 1654416022c9SBarry Smith /* read in my part of the matrix numerical values */ 1655416022c9SBarry Smith nz = procsnz[0]; 1656416022c9SBarry Smith ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 1657d65a2f8fSBarry Smith 1658d65a2f8fSBarry Smith /* insert into matrix */ 1659d65a2f8fSBarry Smith jj = rstart; 1660d65a2f8fSBarry Smith smycols = mycols; 1661d65a2f8fSBarry Smith svals = vals; 1662d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 1663d65a2f8fSBarry Smith ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1664d65a2f8fSBarry Smith smycols += ourlens[i]; 1665d65a2f8fSBarry Smith svals += ourlens[i]; 1666d65a2f8fSBarry Smith jj++; 1667416022c9SBarry Smith } 1668416022c9SBarry Smith 1669d65a2f8fSBarry Smith /* read in other processors and ship out */ 167017699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { 1671416022c9SBarry Smith nz = procsnz[i]; 1672416022c9SBarry Smith ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 1673d65a2f8fSBarry Smith MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1674416022c9SBarry Smith } 16750452661fSBarry Smith PetscFree(procsnz); 1676416022c9SBarry Smith } 1677d65a2f8fSBarry Smith else { 1678d65a2f8fSBarry Smith /* receive numeric values */ 16790452661fSBarry Smith vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1680416022c9SBarry Smith 1681d65a2f8fSBarry Smith /* receive message of values*/ 1682d65a2f8fSBarry Smith MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1683d65a2f8fSBarry Smith MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1684c456f294SBarry Smith if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file"); 1685d65a2f8fSBarry Smith 1686d65a2f8fSBarry Smith /* insert into matrix */ 1687d65a2f8fSBarry Smith jj = rstart; 1688d65a2f8fSBarry Smith smycols = mycols; 1689d65a2f8fSBarry Smith svals = vals; 1690d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 1691d65a2f8fSBarry Smith ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1692d65a2f8fSBarry Smith smycols += ourlens[i]; 1693d65a2f8fSBarry Smith svals += ourlens[i]; 1694d65a2f8fSBarry Smith jj++; 1695d65a2f8fSBarry Smith } 1696d65a2f8fSBarry Smith } 16970452661fSBarry Smith PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1698d65a2f8fSBarry Smith 1699d65a2f8fSBarry Smith ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1700d65a2f8fSBarry Smith ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1701416022c9SBarry Smith return 0; 1702416022c9SBarry Smith } 1703