1cb512458SBarry Smith #ifndef lint 2*7a0afa10SBarry Smith static char vcid[] = "$Id: mpiaij.c,v 1.127 1996/02/23 23:06:31 curfman Exp bsmith $"; 3cb512458SBarry Smith #endif 48a729477SBarry Smith 51eb62cbbSBarry Smith #include "mpiaij.h" 68a729477SBarry Smith #include "vec/vecimpl.h" 7d6dfbf8fSBarry Smith #include "inline/spops.h" 88a729477SBarry Smith 99e25ed09SBarry Smith /* local utility routine that creates a mapping from the global column 109e25ed09SBarry Smith number to the local number in the off-diagonal part of the local 119e25ed09SBarry Smith storage of the matrix. This is done in a non scable way since the 129e25ed09SBarry Smith length of colmap equals the global matrix length. 139e25ed09SBarry Smith */ 14b7c46309SBarry Smith static int CreateColmap_Private(Mat mat) 159e25ed09SBarry Smith { 1644a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 17ec8511deSBarry Smith Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 18416022c9SBarry Smith int n = B->n,i,shift = B->indexshift; 19dbb450caSBarry Smith 200452661fSBarry Smith aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap); 21464493b3SBarry Smith PLogObjectMemory(mat,aij->N*sizeof(int)); 22cddf8d76SBarry Smith PetscMemzero(aij->colmap,aij->N*sizeof(int)); 23dbb450caSBarry Smith for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i-shift; 249e25ed09SBarry Smith return 0; 259e25ed09SBarry Smith } 269e25ed09SBarry Smith 272493cbb0SBarry Smith extern int DisAssemble_MPIAIJ(Mat); 282493cbb0SBarry Smith 2951c98ccdSLois Curfman McInnes static int MatGetReordering_MPIAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm) 30299609e3SLois Curfman McInnes { 31299609e3SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 32299609e3SLois Curfman McInnes int ierr; 3317699dbbSLois Curfman McInnes if (aij->size == 1) { 3451c98ccdSLois Curfman McInnes ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr); 3548d91487SBarry Smith } else SETERRQ(1,"MatGetReordering_MPIAIJ:not supported in parallel"); 36299609e3SLois Curfman McInnes return 0; 37299609e3SLois Curfman McInnes } 38299609e3SLois Curfman McInnes 394b0e389bSBarry Smith static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 408a729477SBarry Smith { 4144a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 42dbb450caSBarry Smith Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data; 434b0e389bSBarry Smith Scalar value; 441eb62cbbSBarry Smith int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 451eb62cbbSBarry Smith int cstart = aij->cstart, cend = aij->cend,row,col; 464b0e389bSBarry Smith int shift = C->indexshift,roworiented = aij->roworiented; 478a729477SBarry Smith 4864119d58SLois Curfman McInnes if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) { 4948d91487SBarry Smith SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds"); 508a729477SBarry Smith } 511eb62cbbSBarry Smith aij->insertmode = addv; 528a729477SBarry Smith for ( i=0; i<m; i++ ) { 534b0e389bSBarry Smith if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row"); 544b0e389bSBarry Smith if (im[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large"); 554b0e389bSBarry Smith if (im[i] >= rstart && im[i] < rend) { 564b0e389bSBarry Smith row = im[i] - rstart; 571eb62cbbSBarry Smith for ( j=0; j<n; j++ ) { 584b0e389bSBarry Smith if (in[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column"); 594b0e389bSBarry Smith if (in[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large"); 604b0e389bSBarry Smith if (in[j] >= cstart && in[j] < cend){ 614b0e389bSBarry Smith col = in[j] - cstart; 624b0e389bSBarry Smith if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 634b0e389bSBarry Smith ierr = MatSetValues(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 641eb62cbbSBarry Smith } 651eb62cbbSBarry Smith else { 66227d817aSBarry Smith if (mat->was_assembled) { 67b7c46309SBarry Smith if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 684b0e389bSBarry Smith col = aij->colmap[in[j]] + shift; 69ec8511deSBarry Smith if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 702493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 714b0e389bSBarry Smith col = in[j]; 72d6dfbf8fSBarry Smith } 739e25ed09SBarry Smith } 744b0e389bSBarry Smith else col = in[j]; 754b0e389bSBarry Smith if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 764b0e389bSBarry Smith ierr = MatSetValues(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 771eb62cbbSBarry Smith } 781eb62cbbSBarry Smith } 791eb62cbbSBarry Smith } 801eb62cbbSBarry Smith else { 814b0e389bSBarry Smith if (roworiented) { 824b0e389bSBarry Smith ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 834b0e389bSBarry Smith } 844b0e389bSBarry Smith else { 854b0e389bSBarry Smith row = im[i]; 864b0e389bSBarry Smith for ( j=0; j<n; j++ ) { 874b0e389bSBarry Smith ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 884b0e389bSBarry Smith } 894b0e389bSBarry Smith } 901eb62cbbSBarry Smith } 918a729477SBarry Smith } 928a729477SBarry Smith return 0; 938a729477SBarry Smith } 948a729477SBarry Smith 95b49de8d1SLois Curfman McInnes static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 96b49de8d1SLois Curfman McInnes { 97b49de8d1SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 98b49de8d1SLois Curfman McInnes Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data; 99b49de8d1SLois Curfman McInnes int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 100b49de8d1SLois Curfman McInnes int cstart = aij->cstart, cend = aij->cend,row,col; 101b49de8d1SLois Curfman McInnes int shift = C->indexshift; 102b49de8d1SLois Curfman McInnes 103b49de8d1SLois Curfman McInnes for ( i=0; i<m; i++ ) { 104b49de8d1SLois Curfman McInnes if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative row"); 105b49de8d1SLois Curfman McInnes if (idxm[i] >= aij->M) SETERRQ(1,"MatGetValues_MPIAIJ:Row too large"); 106b49de8d1SLois Curfman McInnes if (idxm[i] >= rstart && idxm[i] < rend) { 107b49de8d1SLois Curfman McInnes row = idxm[i] - rstart; 108b49de8d1SLois Curfman McInnes for ( j=0; j<n; j++ ) { 109b49de8d1SLois Curfman McInnes if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative column"); 110b49de8d1SLois Curfman McInnes if (idxn[j] >= aij->N) SETERRQ(1,"MatGetValues_MPIAIJ:Col too large"); 111b49de8d1SLois Curfman McInnes if (idxn[j] >= cstart && idxn[j] < cend){ 112b49de8d1SLois Curfman McInnes col = idxn[j] - cstart; 113b49de8d1SLois Curfman McInnes ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 114b49de8d1SLois Curfman McInnes } 115b49de8d1SLois Curfman McInnes else { 116b49de8d1SLois Curfman McInnes col = aij->colmap[idxn[j]] + shift; 117b49de8d1SLois Curfman McInnes ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 118b49de8d1SLois Curfman McInnes } 119b49de8d1SLois Curfman McInnes } 120b49de8d1SLois Curfman McInnes } 121b49de8d1SLois Curfman McInnes else { 122b49de8d1SLois Curfman McInnes SETERRQ(1,"MatGetValues_MPIAIJ:Only local values currently supported"); 123b49de8d1SLois Curfman McInnes } 124b49de8d1SLois Curfman McInnes } 125b49de8d1SLois Curfman McInnes return 0; 126b49de8d1SLois Curfman McInnes } 127b49de8d1SLois Curfman McInnes 128fc76ce87SLois Curfman McInnes static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 1298a729477SBarry Smith { 13044a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 131d6dfbf8fSBarry Smith MPI_Comm comm = mat->comm; 13217699dbbSLois Curfman McInnes int size = aij->size, *owners = aij->rowners; 13317699dbbSLois Curfman McInnes int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 1341eb62cbbSBarry Smith MPI_Request *send_waits,*recv_waits; 1356abc6512SBarry Smith int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 1361eb62cbbSBarry Smith InsertMode addv; 1371eb62cbbSBarry Smith Scalar *rvalues,*svalues; 1381eb62cbbSBarry Smith 1391eb62cbbSBarry Smith /* make sure all processors are either in INSERTMODE or ADDMODE */ 140d65a2f8fSBarry Smith MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 141dbb450caSBarry Smith if (addv == (ADD_VALUES|INSERT_VALUES)) { 142bbb6d6a8SBarry Smith SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added"); 1431eb62cbbSBarry Smith } 1441eb62cbbSBarry Smith aij->insertmode = addv; /* in case this processor had no cache */ 1451eb62cbbSBarry Smith 1461eb62cbbSBarry Smith /* first count number of contributors to each processor */ 1470452661fSBarry Smith nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 148cddf8d76SBarry Smith PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 1490452661fSBarry Smith owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 1501eb62cbbSBarry Smith for ( i=0; i<aij->stash.n; i++ ) { 1511eb62cbbSBarry Smith idx = aij->stash.idx[i]; 15217699dbbSLois Curfman McInnes for ( j=0; j<size; j++ ) { 1531eb62cbbSBarry Smith if (idx >= owners[j] && idx < owners[j+1]) { 1541eb62cbbSBarry Smith nprocs[j]++; procs[j] = 1; owner[i] = j; break; 1558a729477SBarry Smith } 1568a729477SBarry Smith } 1578a729477SBarry Smith } 15817699dbbSLois Curfman McInnes nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 1591eb62cbbSBarry Smith 1601eb62cbbSBarry Smith /* inform other processors of number of messages and max length*/ 1610452661fSBarry Smith work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 16217699dbbSLois Curfman McInnes MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 16317699dbbSLois Curfman McInnes nreceives = work[rank]; 16417699dbbSLois Curfman McInnes MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 16517699dbbSLois Curfman McInnes nmax = work[rank]; 1660452661fSBarry Smith PetscFree(work); 1671eb62cbbSBarry Smith 1681eb62cbbSBarry Smith /* post receives: 1691eb62cbbSBarry Smith 1) each message will consist of ordered pairs 1701eb62cbbSBarry Smith (global index,value) we store the global index as a double 171d6dfbf8fSBarry Smith to simplify the message passing. 1721eb62cbbSBarry Smith 2) since we don't know how long each individual message is we 1731eb62cbbSBarry Smith allocate the largest needed buffer for each receive. Potentially 1741eb62cbbSBarry Smith this is a lot of wasted space. 1751eb62cbbSBarry Smith 1761eb62cbbSBarry Smith 1771eb62cbbSBarry Smith This could be done better. 1781eb62cbbSBarry Smith */ 1790452661fSBarry Smith rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 18078b31e54SBarry Smith CHKPTRQ(rvalues); 1810452661fSBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 18278b31e54SBarry Smith CHKPTRQ(recv_waits); 1831eb62cbbSBarry Smith for ( i=0; i<nreceives; i++ ) { 184416022c9SBarry Smith MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 1851eb62cbbSBarry Smith comm,recv_waits+i); 1861eb62cbbSBarry Smith } 1871eb62cbbSBarry Smith 1881eb62cbbSBarry Smith /* do sends: 1891eb62cbbSBarry Smith 1) starts[i] gives the starting index in svalues for stuff going to 1901eb62cbbSBarry Smith the ith processor 1911eb62cbbSBarry Smith */ 1920452661fSBarry Smith svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 1930452661fSBarry Smith send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 19478b31e54SBarry Smith CHKPTRQ(send_waits); 1950452661fSBarry Smith starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 1961eb62cbbSBarry Smith starts[0] = 0; 19717699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1981eb62cbbSBarry Smith for ( i=0; i<aij->stash.n; i++ ) { 1991eb62cbbSBarry Smith svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 2001eb62cbbSBarry Smith svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 2011eb62cbbSBarry Smith svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 2021eb62cbbSBarry Smith } 2030452661fSBarry Smith PetscFree(owner); 2041eb62cbbSBarry Smith starts[0] = 0; 20517699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 2061eb62cbbSBarry Smith count = 0; 20717699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 2081eb62cbbSBarry Smith if (procs[i]) { 209416022c9SBarry Smith MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 2101eb62cbbSBarry Smith comm,send_waits+count++); 2111eb62cbbSBarry Smith } 2121eb62cbbSBarry Smith } 2130452661fSBarry Smith PetscFree(starts); PetscFree(nprocs); 2141eb62cbbSBarry Smith 2151eb62cbbSBarry Smith /* Free cache space */ 2162191d07cSBarry Smith PLogInfo(0,"[%d]MatAssemblyBegin_MPIAIJ:Number of off processor values %d\n",rank,aij->stash.n); 21778b31e54SBarry Smith ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 2181eb62cbbSBarry Smith 2191eb62cbbSBarry Smith aij->svalues = svalues; aij->rvalues = rvalues; 2201eb62cbbSBarry Smith aij->nsends = nsends; aij->nrecvs = nreceives; 2211eb62cbbSBarry Smith aij->send_waits = send_waits; aij->recv_waits = recv_waits; 2221eb62cbbSBarry Smith aij->rmax = nmax; 2231eb62cbbSBarry Smith 2241eb62cbbSBarry Smith return 0; 2251eb62cbbSBarry Smith } 22644a69424SLois Curfman McInnes extern int MatSetUpMultiply_MPIAIJ(Mat); 2271eb62cbbSBarry Smith 228fc76ce87SLois Curfman McInnes static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 2291eb62cbbSBarry Smith { 23044a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 231dbb450caSBarry Smith Mat_SeqAIJ *C = (Mat_SeqAIJ *) aij->A->data; 2321eb62cbbSBarry Smith MPI_Status *send_status,recv_status; 233416022c9SBarry Smith int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 234416022c9SBarry Smith int row,col,other_disassembled,shift = C->indexshift; 2351eb62cbbSBarry Smith Scalar *values,val; 2361eb62cbbSBarry Smith InsertMode addv = aij->insertmode; 2371eb62cbbSBarry Smith 2381eb62cbbSBarry Smith /* wait on receives */ 2391eb62cbbSBarry Smith while (count) { 240d6dfbf8fSBarry Smith MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 2411eb62cbbSBarry Smith /* unpack receives into our local space */ 242d6dfbf8fSBarry Smith values = aij->rvalues + 3*imdex*aij->rmax; 243c60448a5SBarry Smith MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 2441eb62cbbSBarry Smith n = n/3; 2451eb62cbbSBarry Smith for ( i=0; i<n; i++ ) { 246227d817aSBarry Smith row = (int) PetscReal(values[3*i]) - aij->rstart; 247227d817aSBarry Smith col = (int) PetscReal(values[3*i+1]); 2481eb62cbbSBarry Smith val = values[3*i+2]; 2491eb62cbbSBarry Smith if (col >= aij->cstart && col < aij->cend) { 2501eb62cbbSBarry Smith col -= aij->cstart; 2511eb62cbbSBarry Smith MatSetValues(aij->A,1,&row,1,&col,&val,addv); 2521eb62cbbSBarry Smith } 2531eb62cbbSBarry Smith else { 254227d817aSBarry Smith if (mat->was_assembled) { 255b7c46309SBarry Smith if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 256dbb450caSBarry Smith col = aij->colmap[col] + shift; 257ec8511deSBarry Smith if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 2582493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 259227d817aSBarry Smith col = (int) PetscReal(values[3*i+1]); 260d6dfbf8fSBarry Smith } 2619e25ed09SBarry Smith } 2621eb62cbbSBarry Smith MatSetValues(aij->B,1,&row,1,&col,&val,addv); 2631eb62cbbSBarry Smith } 2641eb62cbbSBarry Smith } 2651eb62cbbSBarry Smith count--; 2661eb62cbbSBarry Smith } 2670452661fSBarry Smith PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 2681eb62cbbSBarry Smith 2691eb62cbbSBarry Smith /* wait on sends */ 2701eb62cbbSBarry Smith if (aij->nsends) { 2710452661fSBarry Smith send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status)); 27278b31e54SBarry Smith CHKPTRQ(send_status); 2731eb62cbbSBarry Smith MPI_Waitall(aij->nsends,aij->send_waits,send_status); 2740452661fSBarry Smith PetscFree(send_status); 2751eb62cbbSBarry Smith } 2760452661fSBarry Smith PetscFree(aij->send_waits); PetscFree(aij->svalues); 2771eb62cbbSBarry Smith 27864119d58SLois Curfman McInnes aij->insertmode = NOT_SET_VALUES; 27978b31e54SBarry Smith ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 28078b31e54SBarry Smith ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 2811eb62cbbSBarry Smith 2822493cbb0SBarry Smith /* determine if any processor has disassembled, if so we must 2832493cbb0SBarry Smith also disassemble ourselfs, in order that we may reassemble. */ 284227d817aSBarry Smith MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 285227d817aSBarry Smith if (mat->was_assembled && !other_disassembled) { 2862493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 2872493cbb0SBarry Smith } 2882493cbb0SBarry Smith 289227d817aSBarry Smith if (!mat->was_assembled && mode == FINAL_ASSEMBLY) { 29078b31e54SBarry Smith ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 2915e42470aSBarry Smith } 29278b31e54SBarry Smith ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 29378b31e54SBarry Smith ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 2945e42470aSBarry Smith 295*7a0afa10SBarry Smith if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 2968a729477SBarry Smith return 0; 2978a729477SBarry Smith } 2988a729477SBarry Smith 2992ee70a88SLois Curfman McInnes static int MatZeroEntries_MPIAIJ(Mat A) 3001eb62cbbSBarry Smith { 30144a69424SLois Curfman McInnes Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 302dbd7a890SLois Curfman McInnes int ierr; 30378b31e54SBarry Smith ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 30478b31e54SBarry Smith ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 3051eb62cbbSBarry Smith return 0; 3061eb62cbbSBarry Smith } 3071eb62cbbSBarry Smith 3081eb62cbbSBarry Smith /* the code does not do the diagonal entries correctly unless the 3091eb62cbbSBarry Smith matrix is square and the column and row owerships are identical. 3101eb62cbbSBarry Smith This is a BUG. The only way to fix it seems to be to access 3111eb62cbbSBarry Smith aij->A and aij->B directly and not through the MatZeroRows() 3121eb62cbbSBarry Smith routine. 3131eb62cbbSBarry Smith */ 31444a69424SLois Curfman McInnes static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 3151eb62cbbSBarry Smith { 31644a69424SLois Curfman McInnes Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 31717699dbbSLois Curfman McInnes int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 3186abc6512SBarry Smith int *procs,*nprocs,j,found,idx,nsends,*work; 31917699dbbSLois Curfman McInnes int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 3205392566eSBarry Smith int *rvalues,tag = A->tag,count,base,slen,n,*source; 321d6dfbf8fSBarry Smith int *lens,imdex,*lrows,*values; 322d6dfbf8fSBarry Smith MPI_Comm comm = A->comm; 3231eb62cbbSBarry Smith MPI_Request *send_waits,*recv_waits; 3241eb62cbbSBarry Smith MPI_Status recv_status,*send_status; 3251eb62cbbSBarry Smith IS istmp; 3261eb62cbbSBarry Smith 32778b31e54SBarry Smith ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr); 32878b31e54SBarry Smith ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 3291eb62cbbSBarry Smith 3301eb62cbbSBarry Smith /* first count number of contributors to each processor */ 3310452661fSBarry Smith nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 332cddf8d76SBarry Smith PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 3330452661fSBarry Smith owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 3341eb62cbbSBarry Smith for ( i=0; i<N; i++ ) { 3351eb62cbbSBarry Smith idx = rows[i]; 3361eb62cbbSBarry Smith found = 0; 33717699dbbSLois Curfman McInnes for ( j=0; j<size; j++ ) { 3381eb62cbbSBarry Smith if (idx >= owners[j] && idx < owners[j+1]) { 3391eb62cbbSBarry Smith nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 3401eb62cbbSBarry Smith } 3411eb62cbbSBarry Smith } 342bbb6d6a8SBarry Smith if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range"); 3431eb62cbbSBarry Smith } 34417699dbbSLois Curfman McInnes nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 3451eb62cbbSBarry Smith 3461eb62cbbSBarry Smith /* inform other processors of number of messages and max length*/ 3470452661fSBarry Smith work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 34817699dbbSLois Curfman McInnes MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 34917699dbbSLois Curfman McInnes nrecvs = work[rank]; 35017699dbbSLois Curfman McInnes MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 35117699dbbSLois Curfman McInnes nmax = work[rank]; 3520452661fSBarry Smith PetscFree(work); 3531eb62cbbSBarry Smith 3541eb62cbbSBarry Smith /* post receives: */ 3550452661fSBarry Smith rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 35678b31e54SBarry Smith CHKPTRQ(rvalues); 3570452661fSBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 35878b31e54SBarry Smith CHKPTRQ(recv_waits); 3591eb62cbbSBarry Smith for ( i=0; i<nrecvs; i++ ) { 360416022c9SBarry Smith MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 3611eb62cbbSBarry Smith } 3621eb62cbbSBarry Smith 3631eb62cbbSBarry Smith /* do sends: 3641eb62cbbSBarry Smith 1) starts[i] gives the starting index in svalues for stuff going to 3651eb62cbbSBarry Smith the ith processor 3661eb62cbbSBarry Smith */ 3670452661fSBarry Smith svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 3680452661fSBarry Smith send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 36978b31e54SBarry Smith CHKPTRQ(send_waits); 3700452661fSBarry Smith starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 3711eb62cbbSBarry Smith starts[0] = 0; 37217699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 3731eb62cbbSBarry Smith for ( i=0; i<N; i++ ) { 3741eb62cbbSBarry Smith svalues[starts[owner[i]]++] = rows[i]; 3751eb62cbbSBarry Smith } 3761eb62cbbSBarry Smith ISRestoreIndices(is,&rows); 3771eb62cbbSBarry Smith 3781eb62cbbSBarry Smith starts[0] = 0; 37917699dbbSLois Curfman McInnes for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 3801eb62cbbSBarry Smith count = 0; 38117699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 3821eb62cbbSBarry Smith if (procs[i]) { 383416022c9SBarry Smith MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 3841eb62cbbSBarry Smith } 3851eb62cbbSBarry Smith } 3860452661fSBarry Smith PetscFree(starts); 3871eb62cbbSBarry Smith 38817699dbbSLois Curfman McInnes base = owners[rank]; 3891eb62cbbSBarry Smith 3901eb62cbbSBarry Smith /* wait on receives */ 3910452661fSBarry Smith lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 3921eb62cbbSBarry Smith source = lens + nrecvs; 3931eb62cbbSBarry Smith count = nrecvs; slen = 0; 3941eb62cbbSBarry Smith while (count) { 395d6dfbf8fSBarry Smith MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 3961eb62cbbSBarry Smith /* unpack receives into our local space */ 3971eb62cbbSBarry Smith MPI_Get_count(&recv_status,MPI_INT,&n); 398d6dfbf8fSBarry Smith source[imdex] = recv_status.MPI_SOURCE; 399d6dfbf8fSBarry Smith lens[imdex] = n; 4001eb62cbbSBarry Smith slen += n; 4011eb62cbbSBarry Smith count--; 4021eb62cbbSBarry Smith } 4030452661fSBarry Smith PetscFree(recv_waits); 4041eb62cbbSBarry Smith 4051eb62cbbSBarry Smith /* move the data into the send scatter */ 4060452661fSBarry Smith lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 4071eb62cbbSBarry Smith count = 0; 4081eb62cbbSBarry Smith for ( i=0; i<nrecvs; i++ ) { 4091eb62cbbSBarry Smith values = rvalues + i*nmax; 4101eb62cbbSBarry Smith for ( j=0; j<lens[i]; j++ ) { 4111eb62cbbSBarry Smith lrows[count++] = values[j] - base; 4121eb62cbbSBarry Smith } 4131eb62cbbSBarry Smith } 4140452661fSBarry Smith PetscFree(rvalues); PetscFree(lens); 4150452661fSBarry Smith PetscFree(owner); PetscFree(nprocs); 4161eb62cbbSBarry Smith 4171eb62cbbSBarry Smith /* actually zap the local rows */ 418416022c9SBarry Smith ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 419464493b3SBarry Smith PLogObjectParent(A,istmp); 4200452661fSBarry Smith PetscFree(lrows); 42178b31e54SBarry Smith ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 42278b31e54SBarry Smith ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 42378b31e54SBarry Smith ierr = ISDestroy(istmp); CHKERRQ(ierr); 4241eb62cbbSBarry Smith 4251eb62cbbSBarry Smith /* wait on sends */ 4261eb62cbbSBarry Smith if (nsends) { 4270452661fSBarry Smith send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 42878b31e54SBarry Smith CHKPTRQ(send_status); 4291eb62cbbSBarry Smith MPI_Waitall(nsends,send_waits,send_status); 4300452661fSBarry Smith PetscFree(send_status); 4311eb62cbbSBarry Smith } 4320452661fSBarry Smith PetscFree(send_waits); PetscFree(svalues); 4331eb62cbbSBarry Smith 4341eb62cbbSBarry Smith return 0; 4351eb62cbbSBarry Smith } 4361eb62cbbSBarry Smith 437416022c9SBarry Smith static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 4381eb62cbbSBarry Smith { 439416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 4401eb62cbbSBarry Smith int ierr; 441416022c9SBarry Smith 44264119d58SLois Curfman McInnes ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 443416022c9SBarry Smith ierr = MatMult(a->A,xx,yy); CHKERRQ(ierr); 44464119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 445416022c9SBarry Smith ierr = MatMultAdd(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 4461eb62cbbSBarry Smith return 0; 4471eb62cbbSBarry Smith } 4481eb62cbbSBarry Smith 449416022c9SBarry Smith static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 450da3a660dSBarry Smith { 451416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 452da3a660dSBarry Smith int ierr; 45364119d58SLois Curfman McInnes ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 454416022c9SBarry Smith ierr = MatMultAdd(a->A,xx,yy,zz); CHKERRQ(ierr); 45564119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 456416022c9SBarry Smith ierr = MatMultAdd(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 457da3a660dSBarry Smith return 0; 458da3a660dSBarry Smith } 459da3a660dSBarry Smith 460416022c9SBarry Smith static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 461da3a660dSBarry Smith { 462416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 463da3a660dSBarry Smith int ierr; 464da3a660dSBarry Smith 465da3a660dSBarry Smith /* do nondiagonal part */ 466416022c9SBarry Smith ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr); 467da3a660dSBarry Smith /* send it on its way */ 468416022c9SBarry Smith ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES, 46964119d58SLois Curfman McInnes (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 470da3a660dSBarry Smith /* do local part */ 471416022c9SBarry Smith ierr = MatMultTrans(a->A,xx,yy); CHKERRQ(ierr); 472da3a660dSBarry Smith /* receive remote parts: note this assumes the values are not actually */ 473da3a660dSBarry Smith /* inserted in yy until the next line, which is true for my implementation*/ 474da3a660dSBarry Smith /* but is not perhaps always true. */ 475416022c9SBarry Smith ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES, 47664119d58SLois Curfman McInnes (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 477da3a660dSBarry Smith return 0; 478da3a660dSBarry Smith } 479da3a660dSBarry Smith 480416022c9SBarry Smith static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 481da3a660dSBarry Smith { 482416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 483da3a660dSBarry Smith int ierr; 484da3a660dSBarry Smith 485da3a660dSBarry Smith /* do nondiagonal part */ 486416022c9SBarry Smith ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr); 487da3a660dSBarry Smith /* send it on its way */ 488416022c9SBarry Smith ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES, 48964119d58SLois Curfman McInnes (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 490da3a660dSBarry Smith /* do local part */ 491416022c9SBarry Smith ierr = MatMultTransAdd(a->A,xx,yy,zz); CHKERRQ(ierr); 492da3a660dSBarry Smith /* receive remote parts: note this assumes the values are not actually */ 493da3a660dSBarry Smith /* inserted in yy until the next line, which is true for my implementation*/ 494da3a660dSBarry Smith /* but is not perhaps always true. */ 495416022c9SBarry Smith ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES, 49664119d58SLois Curfman McInnes (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 497da3a660dSBarry Smith return 0; 498da3a660dSBarry Smith } 499da3a660dSBarry Smith 5001eb62cbbSBarry Smith /* 5011eb62cbbSBarry Smith This only works correctly for square matrices where the subblock A->A is the 5021eb62cbbSBarry Smith diagonal block 5031eb62cbbSBarry Smith */ 504416022c9SBarry Smith static int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 5051eb62cbbSBarry Smith { 506416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 507416022c9SBarry Smith return MatGetDiagonal(a->A,v); 5081eb62cbbSBarry Smith } 5091eb62cbbSBarry Smith 510052efed2SBarry Smith static int MatScale_MPIAIJ(Scalar *aa,Mat A) 511052efed2SBarry Smith { 512052efed2SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 513052efed2SBarry Smith int ierr; 514052efed2SBarry Smith ierr = MatScale(aa,a->A); CHKERRQ(ierr); 515052efed2SBarry Smith ierr = MatScale(aa,a->B); CHKERRQ(ierr); 516052efed2SBarry Smith return 0; 517052efed2SBarry Smith } 518052efed2SBarry Smith 51944a69424SLois Curfman McInnes static int MatDestroy_MPIAIJ(PetscObject obj) 5201eb62cbbSBarry Smith { 5211eb62cbbSBarry Smith Mat mat = (Mat) obj; 52244a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 5231eb62cbbSBarry Smith int ierr; 524a5a9c739SBarry Smith #if defined(PETSC_LOG) 5256e35fa57SLois Curfman McInnes PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 526a5a9c739SBarry Smith #endif 5270452661fSBarry Smith PetscFree(aij->rowners); 52878b31e54SBarry Smith ierr = MatDestroy(aij->A); CHKERRQ(ierr); 52978b31e54SBarry Smith ierr = MatDestroy(aij->B); CHKERRQ(ierr); 5300452661fSBarry Smith if (aij->colmap) PetscFree(aij->colmap); 5310452661fSBarry Smith if (aij->garray) PetscFree(aij->garray); 5321eb62cbbSBarry Smith if (aij->lvec) VecDestroy(aij->lvec); 533a56f8943SBarry Smith if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 534*7a0afa10SBarry Smith if (aij->rowvalues) PetscFree(aij->rowvalues); 5350452661fSBarry Smith PetscFree(aij); 536a5a9c739SBarry Smith PLogObjectDestroy(mat); 5370452661fSBarry Smith PetscHeaderDestroy(mat); 5381eb62cbbSBarry Smith return 0; 5391eb62cbbSBarry Smith } 5407857610eSBarry Smith #include "draw.h" 541b16a3bb1SBarry Smith #include "pinclude/pviewer.h" 542ee50ffe9SBarry Smith 543416022c9SBarry Smith static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 5441eb62cbbSBarry Smith { 545416022c9SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 546416022c9SBarry Smith int ierr; 547416022c9SBarry Smith 54817699dbbSLois Curfman McInnes if (aij->size == 1) { 549416022c9SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 550416022c9SBarry Smith } 551a523beb4SLois Curfman McInnes else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported"); 552416022c9SBarry Smith return 0; 553416022c9SBarry Smith } 554416022c9SBarry Smith 555d7e8b826SBarry Smith static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 556416022c9SBarry Smith { 55744a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 558dbb450caSBarry Smith Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 559a56f8943SBarry Smith int ierr, format,shift = C->indexshift,rank; 560d13ab20cSBarry Smith PetscObject vobj = (PetscObject) viewer; 561d636dbe3SBarry Smith FILE *fd; 562416022c9SBarry Smith 563416022c9SBarry Smith if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) { 564416022c9SBarry Smith ierr = ViewerFileGetFormat_Private(viewer,&format); 56508480c60SBarry Smith if (format == FILE_FORMAT_INFO_DETAILED) { 56695e01e2fSLois Curfman McInnes int nz, nzalloc, mem, flg; 567a56f8943SBarry Smith MPI_Comm_rank(mat->comm,&rank); 568227d817aSBarry Smith ierr = ViewerFileGetPointer(viewer,&fd); CHKERRQ(ierr); 569a56f8943SBarry Smith ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem); 57095e01e2fSLois Curfman McInnes ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 571a56f8943SBarry Smith MPIU_Seq_begin(mat->comm,1); 57295e01e2fSLois Curfman McInnes if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 57395e01e2fSLois Curfman McInnes rank,aij->m,nz,nzalloc,mem); 57495e01e2fSLois Curfman McInnes else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 57595e01e2fSLois Curfman McInnes rank,aij->m,nz,nzalloc,mem); 57608480c60SBarry Smith ierr = MatGetInfo(aij->A,MAT_LOCAL,&nz,&nzalloc,&mem); 57795e01e2fSLois Curfman McInnes fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,nz); 57808480c60SBarry Smith ierr = MatGetInfo(aij->B,MAT_LOCAL,&nz,&nzalloc,&mem); 57995e01e2fSLois Curfman McInnes fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,nz); 580a56f8943SBarry Smith fflush(fd); 581a56f8943SBarry Smith MPIU_Seq_end(mat->comm,1); 582a40aa06bSLois Curfman McInnes ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 583416022c9SBarry Smith return 0; 584416022c9SBarry Smith } 58508480c60SBarry Smith else if (format == FILE_FORMAT_INFO) { 58608480c60SBarry Smith return 0; 58708480c60SBarry Smith } 588416022c9SBarry Smith } 589416022c9SBarry Smith 590416022c9SBarry Smith if (vobj->type == ASCII_FILE_VIEWER) { 591227d817aSBarry Smith ierr = ViewerFileGetPointer(viewer,&fd); CHKERRQ(ierr); 5927f813858SBarry Smith MPIU_Seq_begin(mat->comm,1); 593d13ab20cSBarry Smith fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 59417699dbbSLois Curfman McInnes aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 5951eb62cbbSBarry Smith aij->cend); 59678b31e54SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 59778b31e54SBarry Smith ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 598d13ab20cSBarry Smith fflush(fd); 5997f813858SBarry Smith MPIU_Seq_end(mat->comm,1); 600d13ab20cSBarry Smith } 601416022c9SBarry Smith else { 602a56f8943SBarry Smith int size = aij->size; 603a56f8943SBarry Smith rank = aij->rank; 60417699dbbSLois Curfman McInnes if (size == 1) { 60578b31e54SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 60695373324SBarry Smith } 60795373324SBarry Smith else { 60895373324SBarry Smith /* assemble the entire matrix onto first processor. */ 60995373324SBarry Smith Mat A; 610ec8511deSBarry Smith Mat_SeqAIJ *Aloc; 6112eb8c8abSBarry Smith int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 61295373324SBarry Smith Scalar *a; 6132ee70a88SLois Curfman McInnes 61417699dbbSLois Curfman McInnes if (!rank) { 615b4fd4287SBarry Smith ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 616c451ab25SLois Curfman McInnes CHKERRQ(ierr); 61795373324SBarry Smith } 61895373324SBarry Smith else { 619b4fd4287SBarry Smith ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 620c451ab25SLois Curfman McInnes CHKERRQ(ierr); 62195373324SBarry Smith } 622464493b3SBarry Smith PLogObjectParent(mat,A); 623416022c9SBarry Smith 62495373324SBarry Smith /* copy over the A part */ 625ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) aij->A->data; 6262ee70a88SLois Curfman McInnes m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 62795373324SBarry Smith row = aij->rstart; 628dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 62995373324SBarry Smith for ( i=0; i<m; i++ ) { 630416022c9SBarry Smith ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 63195373324SBarry Smith row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 63295373324SBarry Smith } 6332ee70a88SLois Curfman McInnes aj = Aloc->j; 634dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 63595373324SBarry Smith 63695373324SBarry Smith /* copy over the B part */ 637ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) aij->B->data; 6382ee70a88SLois Curfman McInnes m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 63995373324SBarry Smith row = aij->rstart; 6400452661fSBarry Smith ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 641dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 64295373324SBarry Smith for ( i=0; i<m; i++ ) { 643416022c9SBarry Smith ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 64495373324SBarry Smith row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 64595373324SBarry Smith } 6460452661fSBarry Smith PetscFree(ct); 64778b31e54SBarry Smith ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 64878b31e54SBarry Smith ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 64917699dbbSLois Curfman McInnes if (!rank) { 65078b31e54SBarry Smith ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 65195373324SBarry Smith } 65278b31e54SBarry Smith ierr = MatDestroy(A); CHKERRQ(ierr); 65395373324SBarry Smith } 65495373324SBarry Smith } 6551eb62cbbSBarry Smith return 0; 6561eb62cbbSBarry Smith } 6571eb62cbbSBarry Smith 658416022c9SBarry Smith static int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 659416022c9SBarry Smith { 660416022c9SBarry Smith Mat mat = (Mat) obj; 661416022c9SBarry Smith int ierr; 662416022c9SBarry Smith PetscObject vobj = (PetscObject) viewer; 663416022c9SBarry Smith 664416022c9SBarry Smith if (!viewer) { 665416022c9SBarry Smith viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer; 666416022c9SBarry Smith } 667416022c9SBarry Smith if (vobj->cookie == DRAW_COOKIE && vobj->type == NULLWINDOW) return 0; 668416022c9SBarry Smith else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) { 669d7e8b826SBarry Smith ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 670416022c9SBarry Smith } 671416022c9SBarry Smith else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) { 672d7e8b826SBarry Smith ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 673d7e8b826SBarry Smith } 674d7e8b826SBarry Smith else if (vobj->cookie == VIEWER_COOKIE && vobj->type == MATLAB_VIEWER) { 675d7e8b826SBarry Smith ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 676416022c9SBarry Smith } 677416022c9SBarry Smith else if (vobj->cookie == DRAW_COOKIE) { 678d7e8b826SBarry Smith ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 679416022c9SBarry Smith } 680416022c9SBarry Smith else if (vobj->type == BINARY_FILE_VIEWER) { 681416022c9SBarry Smith return MatView_MPIAIJ_Binary(mat,viewer); 682416022c9SBarry Smith } 683416022c9SBarry Smith return 0; 684416022c9SBarry Smith } 685416022c9SBarry Smith 686ec8511deSBarry Smith extern int MatMarkDiag_SeqAIJ(Mat); 6871eb62cbbSBarry Smith /* 6881eb62cbbSBarry Smith This has to provide several versions. 6891eb62cbbSBarry Smith 6901eb62cbbSBarry Smith 1) per sequential 6911eb62cbbSBarry Smith 2) a) use only local smoothing updating outer values only once. 6921eb62cbbSBarry Smith b) local smoothing updating outer values each inner iteration 693d6dfbf8fSBarry Smith 3) color updating out values betwen colors. 6941eb62cbbSBarry Smith */ 695fc76ce87SLois Curfman McInnes static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 696dbb450caSBarry Smith double fshift,int its,Vec xx) 6978a729477SBarry Smith { 69844a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 699d6dfbf8fSBarry Smith Mat AA = mat->A, BB = mat->B; 700ec8511deSBarry Smith Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 7016abc6512SBarry Smith Scalar zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts; 7026abc6512SBarry Smith int ierr,*idx, *diag; 703416022c9SBarry Smith int n = mat->n, m = mat->m, i,shift = A->indexshift; 704da3a660dSBarry Smith Vec tt; 7058a729477SBarry Smith 706d6dfbf8fSBarry Smith VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 707dbb450caSBarry Smith xs = x + shift; /* shift by one for index start of 1 */ 708dbb450caSBarry Smith ls = ls + shift; 709ec8511deSBarry Smith if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;} 710d6dfbf8fSBarry Smith diag = A->diag; 711acb40c82SBarry Smith if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) { 71248d91487SBarry Smith SETERRQ(1,"MatRelax_MPIAIJ:Option not supported"); 713acb40c82SBarry Smith } 714da3a660dSBarry Smith if (flag & SOR_EISENSTAT) { 715da3a660dSBarry Smith /* Let A = L + U + D; where L is lower trianglar, 716da3a660dSBarry Smith U is upper triangular, E is diagonal; This routine applies 717da3a660dSBarry Smith 718da3a660dSBarry Smith (L + E)^{-1} A (U + E)^{-1} 719da3a660dSBarry Smith 720da3a660dSBarry Smith to a vector efficiently using Eisenstat's trick. This is for 721da3a660dSBarry Smith the case of SSOR preconditioner, so E is D/omega where omega 722da3a660dSBarry Smith is the relaxation factor. 723da3a660dSBarry Smith */ 72478b31e54SBarry Smith ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr); 725464493b3SBarry Smith PLogObjectParent(matin,tt); 726da3a660dSBarry Smith VecGetArray(tt,&t); 727da3a660dSBarry Smith scale = (2.0/omega) - 1.0; 728da3a660dSBarry Smith /* x = (E + U)^{-1} b */ 729da3a660dSBarry Smith VecSet(&zero,mat->lvec); 73064119d58SLois Curfman McInnes ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 73178b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 732da3a660dSBarry Smith for ( i=m-1; i>-1; i-- ) { 733da3a660dSBarry Smith n = A->i[i+1] - diag[i] - 1; 734dbb450caSBarry Smith idx = A->j + diag[i] + !shift; 735dbb450caSBarry Smith v = A->a + diag[i] + !shift; 736da3a660dSBarry Smith sum = b[i]; 737da3a660dSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 738dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 739da3a660dSBarry Smith n = B->i[i+1] - B->i[i]; 740dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 741dbb450caSBarry Smith v = B->a + B->i[i] + shift; 742da3a660dSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 743da3a660dSBarry Smith x[i] = omega*(sum/d); 744da3a660dSBarry Smith } 74564119d58SLois Curfman McInnes ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 74678b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 747da3a660dSBarry Smith 748da3a660dSBarry Smith /* t = b - (2*E - D)x */ 749da3a660dSBarry Smith v = A->a; 750dbb450caSBarry Smith for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ + shift])*x[i]; } 751da3a660dSBarry Smith 752da3a660dSBarry Smith /* t = (E + L)^{-1}t */ 753dbb450caSBarry Smith ts = t + shift; /* shifted by one for index start of a or mat->j*/ 754da3a660dSBarry Smith diag = A->diag; 755da3a660dSBarry Smith VecSet(&zero,mat->lvec); 75664119d58SLois Curfman McInnes ierr = VecPipelineBegin(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 75778b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 758da3a660dSBarry Smith for ( i=0; i<m; i++ ) { 759da3a660dSBarry Smith n = diag[i] - A->i[i]; 760dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 761dbb450caSBarry Smith v = A->a + A->i[i] + shift; 762da3a660dSBarry Smith sum = t[i]; 763da3a660dSBarry Smith SPARSEDENSEMDOT(sum,ts,v,idx,n); 764dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 765da3a660dSBarry Smith n = B->i[i+1] - B->i[i]; 766dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 767dbb450caSBarry Smith v = B->a + B->i[i] + shift; 768da3a660dSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 769da3a660dSBarry Smith t[i] = omega*(sum/d); 770da3a660dSBarry Smith } 77164119d58SLois Curfman McInnes ierr = VecPipelineEnd(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 77278b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 773da3a660dSBarry Smith /* x = x + t */ 774da3a660dSBarry Smith for ( i=0; i<m; i++ ) { x[i] += t[i]; } 775da3a660dSBarry Smith VecDestroy(tt); 776da3a660dSBarry Smith return 0; 777da3a660dSBarry Smith } 778da3a660dSBarry Smith 7791eb62cbbSBarry Smith 780d6dfbf8fSBarry Smith if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){ 781da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 782da3a660dSBarry Smith VecSet(&zero,mat->lvec); VecSet(&zero,xx); 783da3a660dSBarry Smith } 784da3a660dSBarry Smith else { 78564119d58SLois Curfman McInnes ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 78678b31e54SBarry Smith CHKERRQ(ierr); 78764119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 78878b31e54SBarry Smith CHKERRQ(ierr); 789da3a660dSBarry Smith } 790d6dfbf8fSBarry Smith while (its--) { 791d6dfbf8fSBarry Smith /* go down through the rows */ 79264119d58SLois Curfman McInnes ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 79378b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 794d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 795d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 796dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 797dbb450caSBarry Smith v = A->a + A->i[i] + shift; 798d6dfbf8fSBarry Smith sum = b[i]; 799d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 800dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 801d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 802dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 803dbb450caSBarry Smith v = B->a + B->i[i] + shift; 804d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 805d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 806d6dfbf8fSBarry Smith } 80764119d58SLois Curfman McInnes ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 80878b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 809d6dfbf8fSBarry Smith /* come up through the rows */ 81064119d58SLois Curfman McInnes ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 81178b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 812d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 813d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 814dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 815dbb450caSBarry Smith v = A->a + A->i[i] + shift; 816d6dfbf8fSBarry Smith sum = b[i]; 817d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 818dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 819d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 820dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 821dbb450caSBarry Smith v = B->a + B->i[i] + shift; 822d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 823d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 824d6dfbf8fSBarry Smith } 82564119d58SLois Curfman McInnes ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 82678b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 827d6dfbf8fSBarry Smith } 828d6dfbf8fSBarry Smith } 829d6dfbf8fSBarry Smith else if (flag & SOR_FORWARD_SWEEP){ 830da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 831da3a660dSBarry Smith VecSet(&zero,mat->lvec); 83264119d58SLois Curfman McInnes ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 83378b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 834da3a660dSBarry Smith for ( i=0; i<m; i++ ) { 835da3a660dSBarry Smith n = diag[i] - A->i[i]; 836dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 837dbb450caSBarry Smith v = A->a + A->i[i] + shift; 838da3a660dSBarry Smith sum = b[i]; 839da3a660dSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 840dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 841da3a660dSBarry Smith n = B->i[i+1] - B->i[i]; 842dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 843dbb450caSBarry Smith v = B->a + B->i[i] + shift; 844da3a660dSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 845da3a660dSBarry Smith x[i] = omega*(sum/d); 846da3a660dSBarry Smith } 84764119d58SLois Curfman McInnes ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 84878b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 849da3a660dSBarry Smith its--; 850da3a660dSBarry Smith } 851d6dfbf8fSBarry Smith while (its--) { 85264119d58SLois Curfman McInnes ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 85378b31e54SBarry Smith CHKERRQ(ierr); 85464119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 85578b31e54SBarry Smith CHKERRQ(ierr); 85664119d58SLois Curfman McInnes ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 85778b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 858d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 859d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 860dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 861dbb450caSBarry Smith v = A->a + A->i[i] + shift; 862d6dfbf8fSBarry Smith sum = b[i]; 863d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 864dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 865d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 866dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 867dbb450caSBarry Smith v = B->a + B->i[i] + shift; 868d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 869d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 870d6dfbf8fSBarry Smith } 87164119d58SLois Curfman McInnes ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 87278b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 873d6dfbf8fSBarry Smith } 874d6dfbf8fSBarry Smith } 875d6dfbf8fSBarry Smith else if (flag & SOR_BACKWARD_SWEEP){ 876da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 877da3a660dSBarry Smith VecSet(&zero,mat->lvec); 87864119d58SLois Curfman McInnes ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 87978b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 880da3a660dSBarry Smith for ( i=m-1; i>-1; i-- ) { 881da3a660dSBarry Smith n = A->i[i+1] - diag[i] - 1; 882dbb450caSBarry Smith idx = A->j + diag[i] + !shift; 883dbb450caSBarry Smith v = A->a + diag[i] + !shift; 884da3a660dSBarry Smith sum = b[i]; 885da3a660dSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 886dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 887da3a660dSBarry Smith n = B->i[i+1] - B->i[i]; 888dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 889dbb450caSBarry Smith v = B->a + B->i[i] + shift; 890da3a660dSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 891da3a660dSBarry Smith x[i] = omega*(sum/d); 892da3a660dSBarry Smith } 89364119d58SLois Curfman McInnes ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 89478b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 895da3a660dSBarry Smith its--; 896da3a660dSBarry Smith } 897d6dfbf8fSBarry Smith while (its--) { 89864119d58SLois Curfman McInnes ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN, 89978b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 90064119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN, 90178b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 90264119d58SLois Curfman McInnes ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 90378b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 904d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 905d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 906dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 907dbb450caSBarry Smith v = A->a + A->i[i] + shift; 908d6dfbf8fSBarry Smith sum = b[i]; 909d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 910dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 911d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 912dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 913dbb450caSBarry Smith v = B->a + B->i[i] + shift; 914d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 915d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 916d6dfbf8fSBarry Smith } 91764119d58SLois Curfman McInnes ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 91878b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 919d6dfbf8fSBarry Smith } 920d6dfbf8fSBarry Smith } 921d6dfbf8fSBarry Smith else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 922da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 923dbb450caSBarry Smith return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 924da3a660dSBarry Smith } 92564119d58SLois Curfman McInnes ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 92678b31e54SBarry Smith CHKERRQ(ierr); 92764119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 92878b31e54SBarry Smith CHKERRQ(ierr); 929d6dfbf8fSBarry Smith while (its--) { 930d6dfbf8fSBarry Smith /* go down through the rows */ 931d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 932d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 933dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 934dbb450caSBarry Smith v = A->a + A->i[i] + shift; 935d6dfbf8fSBarry Smith sum = b[i]; 936d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 937dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 938d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 939dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 940dbb450caSBarry Smith v = B->a + B->i[i] + shift; 941d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 942d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 943d6dfbf8fSBarry Smith } 944d6dfbf8fSBarry Smith /* come up through the rows */ 945d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 946d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 947dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 948dbb450caSBarry Smith v = A->a + A->i[i] + shift; 949d6dfbf8fSBarry Smith sum = b[i]; 950d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 951dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 952d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 953dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 954dbb450caSBarry Smith v = B->a + B->i[i] + shift; 955d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 956d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 957d6dfbf8fSBarry Smith } 958d6dfbf8fSBarry Smith } 959d6dfbf8fSBarry Smith } 960d6dfbf8fSBarry Smith else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 961da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 962dbb450caSBarry Smith return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 963da3a660dSBarry Smith } 96464119d58SLois Curfman McInnes ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 96578b31e54SBarry Smith CHKERRQ(ierr); 96664119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 96778b31e54SBarry Smith CHKERRQ(ierr); 968d6dfbf8fSBarry Smith while (its--) { 969d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 970d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 971dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 972dbb450caSBarry Smith v = A->a + A->i[i] + shift; 973d6dfbf8fSBarry Smith sum = b[i]; 974d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 975dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 976d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 977dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 978dbb450caSBarry Smith v = B->a + B->i[i] + shift; 979d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 980d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 981d6dfbf8fSBarry Smith } 982d6dfbf8fSBarry Smith } 983d6dfbf8fSBarry Smith } 984d6dfbf8fSBarry Smith else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 985da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 986dbb450caSBarry Smith return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 987da3a660dSBarry Smith } 98864119d58SLois Curfman McInnes ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 98978b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 99064119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 99178b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 992d6dfbf8fSBarry Smith while (its--) { 993d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 994d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 995dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 996dbb450caSBarry Smith v = A->a + A->i[i] + shift; 997d6dfbf8fSBarry Smith sum = b[i]; 998d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 999dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 1000d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 1001dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 1002dbb450caSBarry Smith v = B->a + B->i[i] + shift; 1003d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 1004d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 1005d6dfbf8fSBarry Smith } 1006d6dfbf8fSBarry Smith } 1007d6dfbf8fSBarry Smith } 10088a729477SBarry Smith return 0; 10098a729477SBarry Smith } 1010a66be287SLois Curfman McInnes 1011fc76ce87SLois Curfman McInnes static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz, 1012fc76ce87SLois Curfman McInnes int *nzalloc,int *mem) 1013a66be287SLois Curfman McInnes { 1014a66be287SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1015a66be287SLois Curfman McInnes Mat A = mat->A, B = mat->B; 1016a66be287SLois Curfman McInnes int ierr, isend[3], irecv[3], nzA, nzallocA, memA; 1017a66be287SLois Curfman McInnes 101878b31e54SBarry Smith ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr); 101978b31e54SBarry Smith ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr); 1020a66be287SLois Curfman McInnes isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA; 1021a66be287SLois Curfman McInnes if (flag == MAT_LOCAL) { 1022a66be287SLois Curfman McInnes *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2]; 1023a66be287SLois Curfman McInnes } else if (flag == MAT_GLOBAL_MAX) { 1024d65a2f8fSBarry Smith MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm); 1025a66be287SLois Curfman McInnes *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 1026a66be287SLois Curfman McInnes } else if (flag == MAT_GLOBAL_SUM) { 1027d65a2f8fSBarry Smith MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm); 1028a66be287SLois Curfman McInnes *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 1029a66be287SLois Curfman McInnes } 1030a66be287SLois Curfman McInnes return 0; 1031a66be287SLois Curfman McInnes } 1032a66be287SLois Curfman McInnes 1033299609e3SLois Curfman McInnes extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 1034299609e3SLois Curfman McInnes extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 1035299609e3SLois Curfman McInnes extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 1036299609e3SLois Curfman McInnes extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 1037299609e3SLois Curfman McInnes extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 1038299609e3SLois Curfman McInnes extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1039299609e3SLois Curfman McInnes extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 1040299609e3SLois Curfman McInnes extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1041299609e3SLois Curfman McInnes 1042416022c9SBarry Smith static int MatSetOption_MPIAIJ(Mat A,MatOption op) 1043c74985f6SBarry Smith { 1044c0bbcb79SLois Curfman McInnes Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1045c74985f6SBarry Smith 1046c0bbcb79SLois Curfman McInnes if (op == NO_NEW_NONZERO_LOCATIONS || 1047c0bbcb79SLois Curfman McInnes op == YES_NEW_NONZERO_LOCATIONS || 1048c0bbcb79SLois Curfman McInnes op == COLUMNS_SORTED || 1049c0bbcb79SLois Curfman McInnes op == ROW_ORIENTED) { 1050c0bbcb79SLois Curfman McInnes MatSetOption(a->A,op); 1051c0bbcb79SLois Curfman McInnes MatSetOption(a->B,op); 1052c74985f6SBarry Smith } 1053c0bbcb79SLois Curfman McInnes else if (op == ROWS_SORTED || 1054c0bbcb79SLois Curfman McInnes op == SYMMETRIC_MATRIX || 1055c0bbcb79SLois Curfman McInnes op == STRUCTURALLY_SYMMETRIC_MATRIX || 1056c0bbcb79SLois Curfman McInnes op == YES_NEW_DIAGONALS) 1057c0bbcb79SLois Curfman McInnes PLogInfo((PetscObject)A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 10584b0e389bSBarry Smith else if (op == COLUMN_ORIENTED) { 10594b0e389bSBarry Smith a->roworiented = 0; 10604b0e389bSBarry Smith MatSetOption(a->A,op); 10614b0e389bSBarry Smith MatSetOption(a->B,op); 10624b0e389bSBarry Smith } 1063c0bbcb79SLois Curfman McInnes else if (op == NO_NEW_DIAGONALS) 10644d39ef84SLois Curfman McInnes {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:NO_NEW_DIAGONALS");} 1065c0bbcb79SLois Curfman McInnes else 10664d39ef84SLois Curfman McInnes {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");} 1067c74985f6SBarry Smith return 0; 1068c74985f6SBarry Smith } 1069c74985f6SBarry Smith 1070d1710a03SLois Curfman McInnes static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1071c74985f6SBarry Smith { 107244a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1073c74985f6SBarry Smith *m = mat->M; *n = mat->N; 1074c74985f6SBarry Smith return 0; 1075c74985f6SBarry Smith } 1076c74985f6SBarry Smith 1077d1710a03SLois Curfman McInnes static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1078c74985f6SBarry Smith { 107944a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1080b7c46309SBarry Smith *m = mat->m; *n = mat->N; 1081c74985f6SBarry Smith return 0; 1082c74985f6SBarry Smith } 1083c74985f6SBarry Smith 1084d1710a03SLois Curfman McInnes static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1085c74985f6SBarry Smith { 108644a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1087c74985f6SBarry Smith *m = mat->rstart; *n = mat->rend; 1088c74985f6SBarry Smith return 0; 1089c74985f6SBarry Smith } 1090c74985f6SBarry Smith 109139e00950SLois Curfman McInnes static int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 109239e00950SLois Curfman McInnes { 1093154123eaSLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1094154123eaSLois Curfman McInnes Scalar *vworkA, *vworkB, **pvA, **pvB; 1095154123eaSLois Curfman McInnes int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1096154123eaSLois Curfman McInnes int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 109739e00950SLois Curfman McInnes 1098*7a0afa10SBarry Smith if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,"MatGetRow_MPIAIJ:Already active"); 1099*7a0afa10SBarry Smith mat->getrowactive = PETSC_TRUE; 1100*7a0afa10SBarry Smith 1101*7a0afa10SBarry Smith if (!mat->rowvalues) { 1102*7a0afa10SBarry Smith /* 1103*7a0afa10SBarry Smith allocate enough space to hold information from the longest row. 1104*7a0afa10SBarry Smith */ 1105*7a0afa10SBarry Smith Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1106*7a0afa10SBarry Smith int max = 1,n = mat->n,tmp; 1107*7a0afa10SBarry Smith for ( i=0; i<n; i++ ) { 1108*7a0afa10SBarry Smith tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1109*7a0afa10SBarry Smith if (max < tmp) { max = tmp; } 1110*7a0afa10SBarry Smith } 1111*7a0afa10SBarry Smith mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar))); 1112*7a0afa10SBarry Smith CHKPTRQ(mat->rowvalues); 1113*7a0afa10SBarry Smith mat->rowindices = (int *) (mat->rowvalues + max); 1114*7a0afa10SBarry Smith } 1115*7a0afa10SBarry Smith 1116*7a0afa10SBarry Smith 1117b49de8d1SLois Curfman McInnes if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:Only local rows") 1118abc0e9e4SLois Curfman McInnes lrow = row - rstart; 111939e00950SLois Curfman McInnes 1120154123eaSLois Curfman McInnes pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1121154123eaSLois Curfman McInnes if (!v) {pvA = 0; pvB = 0;} 1122154123eaSLois Curfman McInnes if (!idx) {pcA = 0; if (!v) pcB = 0;} 112378b31e54SBarry Smith ierr = MatGetRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 112478b31e54SBarry Smith ierr = MatGetRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1125154123eaSLois Curfman McInnes nztot = nzA + nzB; 1126154123eaSLois Curfman McInnes 1127154123eaSLois Curfman McInnes if (v || idx) { 1128154123eaSLois Curfman McInnes if (nztot) { 1129154123eaSLois Curfman McInnes /* Sort by increasing column numbers, assuming A and B already sorted */ 1130299609e3SLois Curfman McInnes int imark; 1131154123eaSLois Curfman McInnes for (i=0; i<nzB; i++) cworkB[i] = mat->garray[cworkB[i]]; 1132154123eaSLois Curfman McInnes if (v) { 1133*7a0afa10SBarry Smith *v = mat->rowvalues; 113439e00950SLois Curfman McInnes for ( i=0; i<nzB; i++ ) { 1135154123eaSLois Curfman McInnes if (cworkB[i] < cstart) (*v)[i] = vworkB[i]; 1136154123eaSLois Curfman McInnes else break; 1137154123eaSLois Curfman McInnes } 1138154123eaSLois Curfman McInnes imark = i; 1139154123eaSLois Curfman McInnes for ( i=0; i<nzA; i++ ) (*v)[imark+i] = vworkA[i]; 1140299609e3SLois Curfman McInnes for ( i=imark; i<nzB; i++ ) (*v)[nzA+i] = vworkB[i]; 1141154123eaSLois Curfman McInnes } 1142154123eaSLois Curfman McInnes if (idx) { 1143*7a0afa10SBarry Smith *idx = mat->rowindices; 1144154123eaSLois Curfman McInnes for (i=0; i<nzA; i++) cworkA[i] += cstart; 1145154123eaSLois Curfman McInnes for ( i=0; i<nzB; i++ ) { 1146154123eaSLois Curfman McInnes if (cworkB[i] < cstart) (*idx)[i] = cworkB[i]; 1147154123eaSLois Curfman McInnes else break; 1148154123eaSLois Curfman McInnes } 1149154123eaSLois Curfman McInnes imark = i; 1150154123eaSLois Curfman McInnes for ( i=0; i<nzA; i++ ) (*idx)[imark+i] = cworkA[i]; 1151299609e3SLois Curfman McInnes for ( i=imark; i<nzB; i++ ) (*idx)[nzA+i] = cworkB[i]; 115239e00950SLois Curfman McInnes } 115339e00950SLois Curfman McInnes } 115439e00950SLois Curfman McInnes else {*idx = 0; *v=0;} 1155154123eaSLois Curfman McInnes } 115639e00950SLois Curfman McInnes *nz = nztot; 115778b31e54SBarry Smith ierr = MatRestoreRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 115878b31e54SBarry Smith ierr = MatRestoreRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 115939e00950SLois Curfman McInnes return 0; 116039e00950SLois Curfman McInnes } 116139e00950SLois Curfman McInnes 116239e00950SLois Curfman McInnes static int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 116339e00950SLois Curfman McInnes { 1164*7a0afa10SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1165*7a0afa10SBarry Smith if (aij->getrowactive == PETSC_FALSE) { 1166*7a0afa10SBarry Smith SETERRQ(1,"MatRestoreRow_MPIAIJ:MatGetRow not called"); 1167*7a0afa10SBarry Smith } 1168*7a0afa10SBarry Smith aij->getrowactive = PETSC_FALSE; 116939e00950SLois Curfman McInnes return 0; 117039e00950SLois Curfman McInnes } 117139e00950SLois Curfman McInnes 1172cddf8d76SBarry Smith static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1173855ac2c5SLois Curfman McInnes { 1174855ac2c5SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1175ec8511deSBarry Smith Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1176416022c9SBarry Smith int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1177416022c9SBarry Smith double sum = 0.0; 117804ca555eSLois Curfman McInnes Scalar *v; 117904ca555eSLois Curfman McInnes 118017699dbbSLois Curfman McInnes if (aij->size == 1) { 118114183eadSLois Curfman McInnes ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 118237fa93a5SLois Curfman McInnes } else { 118304ca555eSLois Curfman McInnes if (type == NORM_FROBENIUS) { 118404ca555eSLois Curfman McInnes v = amat->a; 118504ca555eSLois Curfman McInnes for (i=0; i<amat->nz; i++ ) { 118604ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX) 118704ca555eSLois Curfman McInnes sum += real(conj(*v)*(*v)); v++; 118804ca555eSLois Curfman McInnes #else 118904ca555eSLois Curfman McInnes sum += (*v)*(*v); v++; 119004ca555eSLois Curfman McInnes #endif 119104ca555eSLois Curfman McInnes } 119204ca555eSLois Curfman McInnes v = bmat->a; 119304ca555eSLois Curfman McInnes for (i=0; i<bmat->nz; i++ ) { 119404ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX) 119504ca555eSLois Curfman McInnes sum += real(conj(*v)*(*v)); v++; 119604ca555eSLois Curfman McInnes #else 119704ca555eSLois Curfman McInnes sum += (*v)*(*v); v++; 119804ca555eSLois Curfman McInnes #endif 119904ca555eSLois Curfman McInnes } 120004ca555eSLois Curfman McInnes MPI_Allreduce((void*)&sum,(void*)norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 120104ca555eSLois Curfman McInnes *norm = sqrt(*norm); 120204ca555eSLois Curfman McInnes } 120304ca555eSLois Curfman McInnes else if (type == NORM_1) { /* max column norm */ 120404ca555eSLois Curfman McInnes double *tmp, *tmp2; 120504ca555eSLois Curfman McInnes int *jj, *garray = aij->garray; 12060452661fSBarry Smith tmp = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp); 12070452661fSBarry Smith tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2); 1208cddf8d76SBarry Smith PetscMemzero(tmp,aij->N*sizeof(double)); 120904ca555eSLois Curfman McInnes *norm = 0.0; 121004ca555eSLois Curfman McInnes v = amat->a; jj = amat->j; 121104ca555eSLois Curfman McInnes for ( j=0; j<amat->nz; j++ ) { 1212579c6b6fSBarry Smith tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 121304ca555eSLois Curfman McInnes } 121404ca555eSLois Curfman McInnes v = bmat->a; jj = bmat->j; 121504ca555eSLois Curfman McInnes for ( j=0; j<bmat->nz; j++ ) { 1216579c6b6fSBarry Smith tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 121704ca555eSLois Curfman McInnes } 121804ca555eSLois Curfman McInnes MPI_Allreduce((void*)tmp,(void*)tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm); 121904ca555eSLois Curfman McInnes for ( j=0; j<aij->N; j++ ) { 122004ca555eSLois Curfman McInnes if (tmp2[j] > *norm) *norm = tmp2[j]; 122104ca555eSLois Curfman McInnes } 12220452661fSBarry Smith PetscFree(tmp); PetscFree(tmp2); 122304ca555eSLois Curfman McInnes } 122404ca555eSLois Curfman McInnes else if (type == NORM_INFINITY) { /* max row norm */ 1225515d9167SLois Curfman McInnes double ntemp = 0.0; 122604ca555eSLois Curfman McInnes for ( j=0; j<amat->m; j++ ) { 1227dbb450caSBarry Smith v = amat->a + amat->i[j] + shift; 122804ca555eSLois Curfman McInnes sum = 0.0; 122904ca555eSLois Curfman McInnes for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1230cddf8d76SBarry Smith sum += PetscAbsScalar(*v); v++; 123104ca555eSLois Curfman McInnes } 1232dbb450caSBarry Smith v = bmat->a + bmat->i[j] + shift; 123304ca555eSLois Curfman McInnes for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1234cddf8d76SBarry Smith sum += PetscAbsScalar(*v); v++; 123504ca555eSLois Curfman McInnes } 1236515d9167SLois Curfman McInnes if (sum > ntemp) ntemp = sum; 123704ca555eSLois Curfman McInnes } 1238515d9167SLois Curfman McInnes MPI_Allreduce((void*)&ntemp,(void*)norm,1,MPI_DOUBLE,MPI_MAX,mat->comm); 123904ca555eSLois Curfman McInnes } 124004ca555eSLois Curfman McInnes else { 1241515d9167SLois Curfman McInnes SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm"); 124204ca555eSLois Curfman McInnes } 124337fa93a5SLois Curfman McInnes } 1244855ac2c5SLois Curfman McInnes return 0; 1245855ac2c5SLois Curfman McInnes } 1246855ac2c5SLois Curfman McInnes 12470de55854SLois Curfman McInnes static int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1248b7c46309SBarry Smith { 1249b7c46309SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1250dbb450caSBarry Smith Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1251416022c9SBarry Smith int ierr,shift = Aloc->indexshift; 1252416022c9SBarry Smith Mat B; 1253b7c46309SBarry Smith int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1254b7c46309SBarry Smith Scalar *array; 1255b7c46309SBarry Smith 12563638b69dSLois Curfman McInnes if (matout == PETSC_NULL && M != N) 12573638b69dSLois Curfman McInnes SETERRQ(1,"MatTranspose_MPIAIJ:Square matrix only for in-place"); 1258b4fd4287SBarry Smith ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0, 1259b4fd4287SBarry Smith PETSC_NULL,&B); CHKERRQ(ierr); 1260b7c46309SBarry Smith 1261b7c46309SBarry Smith /* copy over the A part */ 1262ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) a->A->data; 1263b7c46309SBarry Smith m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1264b7c46309SBarry Smith row = a->rstart; 1265dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1266b7c46309SBarry Smith for ( i=0; i<m; i++ ) { 1267416022c9SBarry Smith ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1268b7c46309SBarry Smith row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1269b7c46309SBarry Smith } 1270b7c46309SBarry Smith aj = Aloc->j; 12714af08d9eSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1272b7c46309SBarry Smith 1273b7c46309SBarry Smith /* copy over the B part */ 1274ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) a->B->data; 1275b7c46309SBarry Smith m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1276b7c46309SBarry Smith row = a->rstart; 12770452661fSBarry Smith ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1278dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1279b7c46309SBarry Smith for ( i=0; i<m; i++ ) { 1280416022c9SBarry Smith ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1281b7c46309SBarry Smith row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1282b7c46309SBarry Smith } 12830452661fSBarry Smith PetscFree(ct); 1284b7c46309SBarry Smith ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1285b7c46309SBarry Smith ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 12863638b69dSLois Curfman McInnes if (matout != PETSC_NULL) { 12870de55854SLois Curfman McInnes *matout = B; 12880de55854SLois Curfman McInnes } else { 12890de55854SLois Curfman McInnes /* This isn't really an in-place transpose .... but free data structures from a */ 12900452661fSBarry Smith PetscFree(a->rowners); 12910de55854SLois Curfman McInnes ierr = MatDestroy(a->A); CHKERRQ(ierr); 12920de55854SLois Curfman McInnes ierr = MatDestroy(a->B); CHKERRQ(ierr); 12930452661fSBarry Smith if (a->colmap) PetscFree(a->colmap); 12940452661fSBarry Smith if (a->garray) PetscFree(a->garray); 12950de55854SLois Curfman McInnes if (a->lvec) VecDestroy(a->lvec); 1296a56f8943SBarry Smith if (a->Mvctx) VecScatterDestroy(a->Mvctx); 12970452661fSBarry Smith PetscFree(a); 1298416022c9SBarry Smith PetscMemcpy(A,B,sizeof(struct _Mat)); 12990452661fSBarry Smith PetscHeaderDestroy(B); 13000de55854SLois Curfman McInnes } 1301b7c46309SBarry Smith return 0; 1302b7c46309SBarry Smith } 1303b7c46309SBarry Smith 1304682d7d0cSBarry Smith extern int MatPrintHelp_SeqAIJ(Mat); 1305682d7d0cSBarry Smith static int MatPrintHelp_MPIAIJ(Mat A) 1306682d7d0cSBarry Smith { 1307682d7d0cSBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1308682d7d0cSBarry Smith 1309682d7d0cSBarry Smith if (!a->rank) return MatPrintHelp_SeqAIJ(a->A); 1310682d7d0cSBarry Smith else return 0; 1311682d7d0cSBarry Smith } 1312682d7d0cSBarry Smith 1313fc76ce87SLois Curfman McInnes extern int MatConvert_MPIAIJ(Mat,MatType,Mat *); 13143d1612f7SBarry Smith static int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 13152f86bd48SSatish Balay extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1316598137ffSSatish Balay int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **); 13178a729477SBarry Smith /* -------------------------------------------------------------------*/ 13182ee70a88SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 131939e00950SLois Curfman McInnes MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 132044a69424SLois Curfman McInnes MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 132144a69424SLois Curfman McInnes MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1322299609e3SLois Curfman McInnes MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ, 1323299609e3SLois Curfman McInnes MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ, 1324299609e3SLois Curfman McInnes MatLUFactor_MPIAIJ,0, 132544a69424SLois Curfman McInnes MatRelax_MPIAIJ, 1326b7c46309SBarry Smith MatTranspose_MPIAIJ, 1327a66be287SLois Curfman McInnes MatGetInfo_MPIAIJ,0, 1328855ac2c5SLois Curfman McInnes MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ, 1329ee50ffe9SBarry Smith MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 13301eb62cbbSBarry Smith 0, 1331299609e3SLois Curfman McInnes MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1332299609e3SLois Curfman McInnes MatGetReordering_MPIAIJ, 1333299609e3SLois Curfman McInnes MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0, 1334d1710a03SLois Curfman McInnes MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1335299609e3SLois Curfman McInnes MatILUFactorSymbolic_MPIAIJ,0, 13363d1612f7SBarry Smith 0,0,MatConvert_MPIAIJ,0,0,MatConvertSameType_MPIAIJ,0,0, 1337b49de8d1SLois Curfman McInnes 0,0,0, 1338598137ffSSatish Balay MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0, 1339052efed2SBarry Smith MatPrintHelp_MPIAIJ, 1340052efed2SBarry Smith MatScale_MPIAIJ}; 13418a729477SBarry Smith 13421987afe7SBarry Smith /*@C 1343ff756334SLois Curfman McInnes MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 13443a511b96SLois Curfman McInnes (the default parallel PETSc format). For good matrix assembly performance 13453a511b96SLois Curfman McInnes the user should preallocate the matrix storage by setting the parameters 13463a511b96SLois Curfman McInnes d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 13473a511b96SLois Curfman McInnes performance can be increased by more than a factor of 50. 13488a729477SBarry Smith 13498a729477SBarry Smith Input Parameters: 13501eb62cbbSBarry Smith . comm - MPI communicator 13517d3e4905SLois Curfman McInnes . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 13527d3e4905SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated 13537d3e4905SLois Curfman McInnes if N is given) 13547d3e4905SLois Curfman McInnes . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 13557d3e4905SLois Curfman McInnes . N - number of global columns (or PETSC_DECIDE to have calculated 13567d3e4905SLois Curfman McInnes if n is given) 1357ab693e5aSLois Curfman McInnes . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1358ff756334SLois Curfman McInnes (same for all local rows) 1359ab693e5aSLois Curfman McInnes . d_nzz - number of nonzeros per row in diagonal portion of local submatrix 1360ab693e5aSLois Curfman McInnes or null (possibly different for each row). You must leave room 1361ab693e5aSLois Curfman McInnes for the diagonal entry even if it is zero. 1362ab693e5aSLois Curfman McInnes . o_nz - number of nonzeros per row in off-diagonal portion of local 1363ab693e5aSLois Curfman McInnes submatrix (same for all local rows). 1364ab693e5aSLois Curfman McInnes . o_nzz - number of nonzeros per row in off-diagonal portion of local 1365ab693e5aSLois Curfman McInnes submatrix or null (possibly different for each row). 13668a729477SBarry Smith 1367ff756334SLois Curfman McInnes Output Parameter: 13688a729477SBarry Smith . newmat - the matrix 13698a729477SBarry Smith 1370ff756334SLois Curfman McInnes Notes: 1371ff756334SLois Curfman McInnes The AIJ format (also called the Yale sparse matrix format or 1372ff756334SLois Curfman McInnes compressed row storage), is fully compatible with standard Fortran 77 13730002213bSLois Curfman McInnes storage. That is, the stored row and column indices can begin at 13740002213bSLois Curfman McInnes either one (as in Fortran) or zero. See the users manual for details. 1375ff756334SLois Curfman McInnes 1376ff756334SLois Curfman McInnes The user MUST specify either the local or global matrix dimensions 1377ff756334SLois Curfman McInnes (possibly both). 1378ff756334SLois Curfman McInnes 13795511cfe3SLois Curfman McInnes By default, this format uses inodes (identical nodes) when possible. 13805511cfe3SLois Curfman McInnes We search for consecutive rows with the same nonzero structure, thereby 13815511cfe3SLois Curfman McInnes reusing matrix information to achieve increased efficiency. 13825511cfe3SLois Curfman McInnes 13835511cfe3SLois Curfman McInnes Options Database Keys: 13845511cfe3SLois Curfman McInnes $ -mat_aij_no_inode - Do not use inodes 13856e62573dSLois Curfman McInnes $ -mat_aij_inode_limit <limit> - Set inode limit. 13866e62573dSLois Curfman McInnes $ (max limit=5) 13876e62573dSLois Curfman McInnes $ -mat_aij_oneindex - Internally use indexing starting at 1 13886e62573dSLois Curfman McInnes $ rather than 0. Note: When calling MatSetValues(), 13896e62573dSLois Curfman McInnes $ the user still MUST index entries starting at 0! 13905511cfe3SLois Curfman McInnes 1391e0245417SLois Curfman McInnes Storage Information: 1392e0245417SLois Curfman McInnes For a square global matrix we define each processor's diagonal portion 1393e0245417SLois Curfman McInnes to be its local rows and the corresponding columns (a square submatrix); 1394e0245417SLois Curfman McInnes each processor's off-diagonal portion encompasses the remainder of the 1395e0245417SLois Curfman McInnes local matrix (a rectangular submatrix). 1396e0245417SLois Curfman McInnes 1397e0245417SLois Curfman McInnes The user can specify preallocated storage for the diagonal part of 13985ace5be8SLois Curfman McInnes the local submatrix with either d_nz or d_nnz (not both). Set 13995ace5be8SLois Curfman McInnes d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 14005ace5be8SLois Curfman McInnes memory allocation. Likewise, specify preallocated storage for the 14015ace5be8SLois Curfman McInnes off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1402ff756334SLois Curfman McInnes 14035511cfe3SLois Curfman McInnes Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 14045511cfe3SLois Curfman McInnes the figure below we depict these three local rows and all columns (0-11). 14052191d07cSBarry Smith 1406b810aeb4SBarry Smith $ 0 1 2 3 4 5 6 7 8 9 10 11 1407b810aeb4SBarry Smith $ ------------------- 14085511cfe3SLois Curfman McInnes $ row 3 | o o o d d d o o o o o o 14095511cfe3SLois Curfman McInnes $ row 4 | o o o d d d o o o o o o 14105511cfe3SLois Curfman McInnes $ row 5 | o o o d d d o o o o o o 14115511cfe3SLois Curfman McInnes $ ------------------- 1412b810aeb4SBarry Smith $ 1413b810aeb4SBarry Smith 14145511cfe3SLois Curfman McInnes Thus, any entries in the d locations are stored in the d (diagonal) 14155511cfe3SLois Curfman McInnes submatrix, and any entries in the o locations are stored in the 14165511cfe3SLois Curfman McInnes o (off-diagonal) submatrix. Note that the d and the o submatrices are 14175511cfe3SLois Curfman McInnes stored simply in the MATSEQAIJ format for compressed row storage. 14185511cfe3SLois Curfman McInnes 14195511cfe3SLois Curfman McInnes Now d_nz should indicate the number of nonzeros per row in the d matrix, 14205511cfe3SLois Curfman McInnes and o_nz should indicate the number of nonzeros per row in the o matrix. 14215511cfe3SLois Curfman McInnes In general, for PDE problems in which most nonzeros are near the diagonal, 14223a511b96SLois Curfman McInnes one expects d_nz >> o_nz. For additional details, see the users manual 14233a511b96SLois Curfman McInnes chapter on matrices and the file $(PETSC_DIR)/Performance. 14243a511b96SLois Curfman McInnes 1425dbd7a890SLois Curfman McInnes .keywords: matrix, aij, compressed row, sparse, parallel 1426ff756334SLois Curfman McInnes 1427fafbff53SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 14288a729477SBarry Smith @*/ 14291eb62cbbSBarry Smith int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 14301eb62cbbSBarry Smith int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *newmat) 14318a729477SBarry Smith { 14328a729477SBarry Smith Mat mat; 1433416022c9SBarry Smith Mat_MPIAIJ *a; 14346abc6512SBarry Smith int ierr, i,sum[2],work[2]; 1435416022c9SBarry Smith 14368a729477SBarry Smith *newmat = 0; 14370452661fSBarry Smith PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 1438a5a9c739SBarry Smith PLogObjectCreate(mat); 14390452661fSBarry Smith mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1440416022c9SBarry Smith PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 144144a69424SLois Curfman McInnes mat->destroy = MatDestroy_MPIAIJ; 144244a69424SLois Curfman McInnes mat->view = MatView_MPIAIJ; 14438a729477SBarry Smith mat->factor = 0; 1444c456f294SBarry Smith mat->assembled = PETSC_FALSE; 1445d6dfbf8fSBarry Smith 144664119d58SLois Curfman McInnes a->insertmode = NOT_SET_VALUES; 144717699dbbSLois Curfman McInnes MPI_Comm_rank(comm,&a->rank); 144817699dbbSLois Curfman McInnes MPI_Comm_size(comm,&a->size); 14491eb62cbbSBarry Smith 1450b4fd4287SBarry Smith if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 14511987afe7SBarry Smith SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSc decide rows but set d_nnz or o_nnz"); 14521987afe7SBarry Smith 1453dbd7a890SLois Curfman McInnes if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 14541eb62cbbSBarry Smith work[0] = m; work[1] = n; 1455d65a2f8fSBarry Smith MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1456dbd7a890SLois Curfman McInnes if (M == PETSC_DECIDE) M = sum[0]; 1457dbd7a890SLois Curfman McInnes if (N == PETSC_DECIDE) N = sum[1]; 14581eb62cbbSBarry Smith } 145917699dbbSLois Curfman McInnes if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);} 146017699dbbSLois Curfman McInnes if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);} 1461416022c9SBarry Smith a->m = m; 1462416022c9SBarry Smith a->n = n; 1463416022c9SBarry Smith a->N = N; 1464416022c9SBarry Smith a->M = M; 14651eb62cbbSBarry Smith 14661eb62cbbSBarry Smith /* build local table of row and column ownerships */ 14670452661fSBarry Smith a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1468579c6b6fSBarry Smith PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 146917699dbbSLois Curfman McInnes a->cowners = a->rowners + a->size + 1; 1470416022c9SBarry Smith MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm); 1471416022c9SBarry Smith a->rowners[0] = 0; 147217699dbbSLois Curfman McInnes for ( i=2; i<=a->size; i++ ) { 1473416022c9SBarry Smith a->rowners[i] += a->rowners[i-1]; 14748a729477SBarry Smith } 147517699dbbSLois Curfman McInnes a->rstart = a->rowners[a->rank]; 147617699dbbSLois Curfman McInnes a->rend = a->rowners[a->rank+1]; 1477416022c9SBarry Smith MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm); 1478416022c9SBarry Smith a->cowners[0] = 0; 147917699dbbSLois Curfman McInnes for ( i=2; i<=a->size; i++ ) { 1480416022c9SBarry Smith a->cowners[i] += a->cowners[i-1]; 14818a729477SBarry Smith } 148217699dbbSLois Curfman McInnes a->cstart = a->cowners[a->rank]; 148317699dbbSLois Curfman McInnes a->cend = a->cowners[a->rank+1]; 14848a729477SBarry Smith 14855ace5be8SLois Curfman McInnes if (d_nz == PETSC_DEFAULT) d_nz = 5; 1486416022c9SBarry Smith ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&a->A); CHKERRQ(ierr); 1487416022c9SBarry Smith PLogObjectParent(mat,a->A); 14887b8455f0SLois Curfman McInnes if (o_nz == PETSC_DEFAULT) o_nz = 0; 1489416022c9SBarry Smith ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&a->B); CHKERRQ(ierr); 1490416022c9SBarry Smith PLogObjectParent(mat,a->B); 14918a729477SBarry Smith 14921eb62cbbSBarry Smith /* build cache for off array entries formed */ 1493416022c9SBarry Smith ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr); 1494416022c9SBarry Smith a->colmap = 0; 1495416022c9SBarry Smith a->garray = 0; 14964b0e389bSBarry Smith a->roworiented = 1; 14978a729477SBarry Smith 14981eb62cbbSBarry Smith /* stuff used for matrix vector multiply */ 1499416022c9SBarry Smith a->lvec = 0; 1500416022c9SBarry Smith a->Mvctx = 0; 15018a729477SBarry Smith 1502*7a0afa10SBarry Smith /* stuff for MatGetRow() */ 1503*7a0afa10SBarry Smith a->rowindices = 0; 1504*7a0afa10SBarry Smith a->rowvalues = 0; 1505*7a0afa10SBarry Smith a->getrowactive = PETSC_FALSE; 1506*7a0afa10SBarry Smith 1507d6dfbf8fSBarry Smith *newmat = mat; 1508d6dfbf8fSBarry Smith return 0; 1509d6dfbf8fSBarry Smith } 1510c74985f6SBarry Smith 15113d1612f7SBarry Smith static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1512d6dfbf8fSBarry Smith { 1513d6dfbf8fSBarry Smith Mat mat; 1514416022c9SBarry Smith Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 151525cdf11fSBarry Smith int ierr, len,flg; 1516d6dfbf8fSBarry Smith 1517416022c9SBarry Smith *newmat = 0; 15180452661fSBarry Smith PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1519a5a9c739SBarry Smith PLogObjectCreate(mat); 15200452661fSBarry Smith mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1521416022c9SBarry Smith PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 152244a69424SLois Curfman McInnes mat->destroy = MatDestroy_MPIAIJ; 152344a69424SLois Curfman McInnes mat->view = MatView_MPIAIJ; 1524d6dfbf8fSBarry Smith mat->factor = matin->factor; 1525c456f294SBarry Smith mat->assembled = PETSC_TRUE; 1526d6dfbf8fSBarry Smith 1527416022c9SBarry Smith a->m = oldmat->m; 1528416022c9SBarry Smith a->n = oldmat->n; 1529416022c9SBarry Smith a->M = oldmat->M; 1530416022c9SBarry Smith a->N = oldmat->N; 1531d6dfbf8fSBarry Smith 1532416022c9SBarry Smith a->rstart = oldmat->rstart; 1533416022c9SBarry Smith a->rend = oldmat->rend; 1534416022c9SBarry Smith a->cstart = oldmat->cstart; 1535416022c9SBarry Smith a->cend = oldmat->cend; 153617699dbbSLois Curfman McInnes a->size = oldmat->size; 153717699dbbSLois Curfman McInnes a->rank = oldmat->rank; 153864119d58SLois Curfman McInnes a->insertmode = NOT_SET_VALUES; 1539d6dfbf8fSBarry Smith 15400452661fSBarry Smith a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners); 154117699dbbSLois Curfman McInnes PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 154217699dbbSLois Curfman McInnes PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int)); 1543416022c9SBarry Smith ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 15442ee70a88SLois Curfman McInnes if (oldmat->colmap) { 15450452661fSBarry Smith a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1546416022c9SBarry Smith PLogObjectMemory(mat,(a->N)*sizeof(int)); 1547416022c9SBarry Smith PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1548416022c9SBarry Smith } else a->colmap = 0; 1549ec8511deSBarry Smith if (oldmat->garray && (len = ((Mat_SeqAIJ *) (oldmat->B->data))->n)) { 15500452661fSBarry Smith a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 1551464493b3SBarry Smith PLogObjectMemory(mat,len*sizeof(int)); 1552416022c9SBarry Smith PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1553416022c9SBarry Smith } else a->garray = 0; 1554d6dfbf8fSBarry Smith 1555416022c9SBarry Smith ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1556416022c9SBarry Smith PLogObjectParent(mat,a->lvec); 1557a56f8943SBarry Smith ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1558416022c9SBarry Smith PLogObjectParent(mat,a->Mvctx); 1559416022c9SBarry Smith ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1560416022c9SBarry Smith PLogObjectParent(mat,a->A); 1561416022c9SBarry Smith ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1562416022c9SBarry Smith PLogObjectParent(mat,a->B); 15635dd7a6c7SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 156425cdf11fSBarry Smith if (flg) { 1565682d7d0cSBarry Smith ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1566682d7d0cSBarry Smith } 15678a729477SBarry Smith *newmat = mat; 15688a729477SBarry Smith return 0; 15698a729477SBarry Smith } 1570416022c9SBarry Smith 1571416022c9SBarry Smith #include "sysio.h" 1572416022c9SBarry Smith 1573c456f294SBarry Smith int MatLoad_MPIAIJ(Viewer bview,MatType type,Mat *newmat) 1574416022c9SBarry Smith { 1575d65a2f8fSBarry Smith Mat A; 1576416022c9SBarry Smith int i, nz, ierr, j,rstart, rend, fd; 1577d65a2f8fSBarry Smith Scalar *vals,*svals; 1578416022c9SBarry Smith PetscObject vobj = (PetscObject) bview; 1579416022c9SBarry Smith MPI_Comm comm = vobj->comm; 1580416022c9SBarry Smith MPI_Status status; 158117699dbbSLois Curfman McInnes int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1582d65a2f8fSBarry Smith int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1583d65a2f8fSBarry Smith int tag = ((PetscObject)bview)->tag; 1584416022c9SBarry Smith 158517699dbbSLois Curfman McInnes MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 158617699dbbSLois Curfman McInnes if (!rank) { 1587416022c9SBarry Smith ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr); 1588416022c9SBarry Smith ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr); 1589c456f294SBarry Smith if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJ:not matrix object"); 1590416022c9SBarry Smith } 1591416022c9SBarry Smith 1592416022c9SBarry Smith MPI_Bcast(header+1,3,MPI_INT,0,comm); 1593416022c9SBarry Smith M = header[1]; N = header[2]; 1594416022c9SBarry Smith /* determine ownership of all rows */ 159517699dbbSLois Curfman McInnes m = M/size + ((M % size) > rank); 15960452661fSBarry Smith rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1597416022c9SBarry Smith MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1598416022c9SBarry Smith rowners[0] = 0; 159917699dbbSLois Curfman McInnes for ( i=2; i<=size; i++ ) { 1600416022c9SBarry Smith rowners[i] += rowners[i-1]; 1601416022c9SBarry Smith } 160217699dbbSLois Curfman McInnes rstart = rowners[rank]; 160317699dbbSLois Curfman McInnes rend = rowners[rank+1]; 1604416022c9SBarry Smith 1605416022c9SBarry Smith /* distribute row lengths to all processors */ 16060452661fSBarry Smith ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1607416022c9SBarry Smith offlens = ourlens + (rend-rstart); 160817699dbbSLois Curfman McInnes if (!rank) { 16090452661fSBarry Smith rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1610416022c9SBarry Smith ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr); 16110452661fSBarry Smith sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 161217699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1613d65a2f8fSBarry Smith MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 16140452661fSBarry Smith PetscFree(sndcounts); 1615416022c9SBarry Smith } 1616416022c9SBarry Smith else { 1617416022c9SBarry Smith MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1618416022c9SBarry Smith } 1619416022c9SBarry Smith 162017699dbbSLois Curfman McInnes if (!rank) { 1621416022c9SBarry Smith /* calculate the number of nonzeros on each processor */ 16220452661fSBarry Smith procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1623cddf8d76SBarry Smith PetscMemzero(procsnz,size*sizeof(int)); 162417699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 1625416022c9SBarry Smith for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1626416022c9SBarry Smith procsnz[i] += rowlengths[j]; 1627416022c9SBarry Smith } 1628416022c9SBarry Smith } 16290452661fSBarry Smith PetscFree(rowlengths); 1630416022c9SBarry Smith 1631416022c9SBarry Smith /* determine max buffer needed and allocate it */ 1632416022c9SBarry Smith maxnz = 0; 163317699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 16340452661fSBarry Smith maxnz = PetscMax(maxnz,procsnz[i]); 1635416022c9SBarry Smith } 16360452661fSBarry Smith cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1637416022c9SBarry Smith 1638416022c9SBarry Smith /* read in my part of the matrix column indices */ 1639416022c9SBarry Smith nz = procsnz[0]; 16400452661fSBarry Smith mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1641d65a2f8fSBarry Smith ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr); 1642d65a2f8fSBarry Smith 1643d65a2f8fSBarry Smith /* read in every one elses and ship off */ 164417699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { 1645d65a2f8fSBarry Smith nz = procsnz[i]; 1646416022c9SBarry Smith ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr); 1647d65a2f8fSBarry Smith MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1648d65a2f8fSBarry Smith } 16490452661fSBarry Smith PetscFree(cols); 1650416022c9SBarry Smith } 1651416022c9SBarry Smith else { 1652416022c9SBarry Smith /* determine buffer space needed for message */ 1653416022c9SBarry Smith nz = 0; 1654416022c9SBarry Smith for ( i=0; i<m; i++ ) { 1655416022c9SBarry Smith nz += ourlens[i]; 1656416022c9SBarry Smith } 16570452661fSBarry Smith mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1658416022c9SBarry Smith 1659416022c9SBarry Smith /* receive message of column indices*/ 1660d65a2f8fSBarry Smith MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1661416022c9SBarry Smith MPI_Get_count(&status,MPI_INT,&maxnz); 1662c456f294SBarry Smith if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file"); 1663416022c9SBarry Smith } 1664416022c9SBarry Smith 1665416022c9SBarry Smith /* loop over local rows, determining number of off diagonal entries */ 1666cddf8d76SBarry Smith PetscMemzero(offlens,m*sizeof(int)); 1667416022c9SBarry Smith jj = 0; 1668416022c9SBarry Smith for ( i=0; i<m; i++ ) { 1669416022c9SBarry Smith for ( j=0; j<ourlens[i]; j++ ) { 1670d65a2f8fSBarry Smith if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1671416022c9SBarry Smith jj++; 1672416022c9SBarry Smith } 1673416022c9SBarry Smith } 1674d65a2f8fSBarry Smith 1675d65a2f8fSBarry Smith /* create our matrix */ 1676416022c9SBarry Smith for ( i=0; i<m; i++ ) { 1677416022c9SBarry Smith ourlens[i] -= offlens[i]; 1678416022c9SBarry Smith } 1679d65a2f8fSBarry Smith ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1680d65a2f8fSBarry Smith A = *newmat; 1681d65a2f8fSBarry Smith MatSetOption(A,COLUMNS_SORTED); 1682d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 1683d65a2f8fSBarry Smith ourlens[i] += offlens[i]; 1684d65a2f8fSBarry Smith } 1685416022c9SBarry Smith 168617699dbbSLois Curfman McInnes if (!rank) { 16870452661fSBarry Smith vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1688416022c9SBarry Smith 1689416022c9SBarry Smith /* read in my part of the matrix numerical values */ 1690416022c9SBarry Smith nz = procsnz[0]; 1691416022c9SBarry Smith ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 1692d65a2f8fSBarry Smith 1693d65a2f8fSBarry Smith /* insert into matrix */ 1694d65a2f8fSBarry Smith jj = rstart; 1695d65a2f8fSBarry Smith smycols = mycols; 1696d65a2f8fSBarry Smith svals = vals; 1697d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 1698d65a2f8fSBarry Smith ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1699d65a2f8fSBarry Smith smycols += ourlens[i]; 1700d65a2f8fSBarry Smith svals += ourlens[i]; 1701d65a2f8fSBarry Smith jj++; 1702416022c9SBarry Smith } 1703416022c9SBarry Smith 1704d65a2f8fSBarry Smith /* read in other processors and ship out */ 170517699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { 1706416022c9SBarry Smith nz = procsnz[i]; 1707416022c9SBarry Smith ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 1708d65a2f8fSBarry Smith MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1709416022c9SBarry Smith } 17100452661fSBarry Smith PetscFree(procsnz); 1711416022c9SBarry Smith } 1712d65a2f8fSBarry Smith else { 1713d65a2f8fSBarry Smith /* receive numeric values */ 17140452661fSBarry Smith vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1715416022c9SBarry Smith 1716d65a2f8fSBarry Smith /* receive message of values*/ 1717d65a2f8fSBarry Smith MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1718d65a2f8fSBarry Smith MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1719c456f294SBarry Smith if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file"); 1720d65a2f8fSBarry Smith 1721d65a2f8fSBarry Smith /* insert into matrix */ 1722d65a2f8fSBarry Smith jj = rstart; 1723d65a2f8fSBarry Smith smycols = mycols; 1724d65a2f8fSBarry Smith svals = vals; 1725d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 1726d65a2f8fSBarry Smith ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1727d65a2f8fSBarry Smith smycols += ourlens[i]; 1728d65a2f8fSBarry Smith svals += ourlens[i]; 1729d65a2f8fSBarry Smith jj++; 1730d65a2f8fSBarry Smith } 1731d65a2f8fSBarry Smith } 17320452661fSBarry Smith PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1733d65a2f8fSBarry Smith 1734d65a2f8fSBarry Smith ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1735d65a2f8fSBarry Smith ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1736416022c9SBarry Smith return 0; 1737416022c9SBarry Smith } 1738