179bdfe76SSatish Balay #ifndef lint 2*57b952d6SSatish Balay static char vcid[] = "$Id: mpibaij.c,v 1.2 1996/06/04 21:40:25 balay Exp balay $"; 379bdfe76SSatish Balay #endif 479bdfe76SSatish Balay 579bdfe76SSatish Balay #include "mpibaij.h" 679bdfe76SSatish Balay 779bdfe76SSatish Balay 8*57b952d6SSatish Balay #include "draw.h" 9*57b952d6SSatish Balay #include "pinclude/pviewer.h" 10*57b952d6SSatish Balay 11*57b952d6SSatish Balay extern int MatSetUpMultiply_MPIBAIJ(Mat); 12*57b952d6SSatish Balay extern int DisAssemble_MPIBAIJ(Mat); 13*57b952d6SSatish Balay 14*57b952d6SSatish Balay /* local utility routine that creates a mapping from the global column 15*57b952d6SSatish Balay number to the local number in the off-diagonal part of the local 16*57b952d6SSatish Balay storage of the matrix. This is done in a non scable way since the 17*57b952d6SSatish Balay length of colmap equals the global matrix length. 18*57b952d6SSatish Balay */ 19*57b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat) 20*57b952d6SSatish Balay { 21*57b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 22*57b952d6SSatish Balay Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data; 23*57b952d6SSatish Balay int nbs = B->nbs,i; 24*57b952d6SSatish Balay 25*57b952d6SSatish Balay baij->colmap = (int *) PetscMalloc(baij->Nbs*sizeof(int));CHKPTRQ(baij->colmap); 26*57b952d6SSatish Balay PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 27*57b952d6SSatish Balay PetscMemzero(baij->colmap,baij->Nbs*sizeof(int)); 28*57b952d6SSatish Balay for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i; 29*57b952d6SSatish Balay return 0; 30*57b952d6SSatish Balay } 31*57b952d6SSatish Balay 32*57b952d6SSatish Balay 33*57b952d6SSatish Balay static int MatGetReordering_MPIBAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm) 34*57b952d6SSatish Balay { 35*57b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 36*57b952d6SSatish Balay int ierr; 37*57b952d6SSatish Balay if (baij->size == 1) { 38*57b952d6SSatish Balay ierr = MatGetReordering(baij->A,type,rperm,cperm); CHKERRQ(ierr); 39*57b952d6SSatish Balay } else SETERRQ(1,"MatGetReordering_MPIBAIJ:not supported in parallel"); 40*57b952d6SSatish Balay return 0; 41*57b952d6SSatish Balay } 42*57b952d6SSatish Balay 43*57b952d6SSatish Balay static int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 44*57b952d6SSatish Balay { 45*57b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 46*57b952d6SSatish Balay Scalar value; 47*57b952d6SSatish Balay int ierr,i,j, rstart = baij->rstart, rend = baij->rend; 48*57b952d6SSatish Balay int cstart = baij->cstart, cend = baij->cend,row,col; 49*57b952d6SSatish Balay int roworiented = baij->roworiented,rstart_orig,rend_orig; 50*57b952d6SSatish Balay int cstart_orig,cend_orig,bs=baij->bs; 51*57b952d6SSatish Balay 52*57b952d6SSatish Balay if (baij->insertmode != NOT_SET_VALUES && baij->insertmode != addv) { 53*57b952d6SSatish Balay SETERRQ(1,"MatSetValues_MPIBAIJ:Cannot mix inserts and adds"); 54*57b952d6SSatish Balay } 55*57b952d6SSatish Balay baij->insertmode = addv; 56*57b952d6SSatish Balay rstart_orig = rstart*bs; 57*57b952d6SSatish Balay rend_orig = rend*bs; 58*57b952d6SSatish Balay cstart_orig = cstart*bs; 59*57b952d6SSatish Balay cend_orig = cend*bs; 60*57b952d6SSatish Balay for ( i=0; i<m; i++ ) { 61*57b952d6SSatish Balay if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIBAIJ:Negative row"); 62*57b952d6SSatish Balay if (im[i] >= baij->M) SETERRQ(1,"MatSetValues_MPIBAIJ:Row too large"); 63*57b952d6SSatish Balay if (im[i] >= rstart_orig && im[i] < rend_orig) { 64*57b952d6SSatish Balay row = im[i] - rstart_orig; 65*57b952d6SSatish Balay for ( j=0; j<n; j++ ) { 66*57b952d6SSatish Balay if (in[j] < 0) SETERRQ(1,"MatSetValues_MPIBAIJ:Negative column"); 67*57b952d6SSatish Balay if (in[j] >= baij->N) SETERRQ(1,"MatSetValues_MPIBAIJ:Col too large"); 68*57b952d6SSatish Balay if (in[j] >= cstart_orig && in[j] < cend_orig){ 69*57b952d6SSatish Balay col = in[j] - cstart_orig; 70*57b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 71*57b952d6SSatish Balay ierr = MatSetValues(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 72*57b952d6SSatish Balay } 73*57b952d6SSatish Balay else { 74*57b952d6SSatish Balay if (mat->was_assembled) { 75*57b952d6SSatish Balay if (!baij->colmap) {ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);} 76*57b952d6SSatish Balay col = baij->colmap[in[j]]; 77*57b952d6SSatish Balay if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 78*57b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 79*57b952d6SSatish Balay col = in[j]; 80*57b952d6SSatish Balay } 81*57b952d6SSatish Balay } 82*57b952d6SSatish Balay else col = in[j]; 83*57b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 84*57b952d6SSatish Balay ierr = MatSetValues(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 85*57b952d6SSatish Balay } 86*57b952d6SSatish Balay } 87*57b952d6SSatish Balay } 88*57b952d6SSatish Balay else { 89*57b952d6SSatish Balay if (roworiented) { 90*57b952d6SSatish Balay ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 91*57b952d6SSatish Balay } 92*57b952d6SSatish Balay else { 93*57b952d6SSatish Balay row = im[i]; 94*57b952d6SSatish Balay for ( j=0; j<n; j++ ) { 95*57b952d6SSatish Balay ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 96*57b952d6SSatish Balay } 97*57b952d6SSatish Balay } 98*57b952d6SSatish Balay } 99*57b952d6SSatish Balay } 100*57b952d6SSatish Balay return 0; 101*57b952d6SSatish Balay } 102*57b952d6SSatish Balay 103*57b952d6SSatish Balay 104*57b952d6SSatish Balay static int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 105*57b952d6SSatish Balay { 106*57b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 107*57b952d6SSatish Balay MPI_Comm comm = mat->comm; 108*57b952d6SSatish Balay int size = baij->size, *owners = baij->rowners,bs=baij->bs; 109*57b952d6SSatish Balay int rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr; 110*57b952d6SSatish Balay MPI_Request *send_waits,*recv_waits; 111*57b952d6SSatish Balay int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 112*57b952d6SSatish Balay InsertMode addv; 113*57b952d6SSatish Balay Scalar *rvalues,*svalues; 114*57b952d6SSatish Balay 115*57b952d6SSatish Balay /* make sure all processors are either in INSERTMODE or ADDMODE */ 116*57b952d6SSatish Balay MPI_Allreduce(&baij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 117*57b952d6SSatish Balay if (addv == (ADD_VALUES|INSERT_VALUES)) { 118*57b952d6SSatish Balay SETERRQ(1,"MatAssemblyBegin_MPIBAIJ:Some processors inserted others added"); 119*57b952d6SSatish Balay } 120*57b952d6SSatish Balay baij->insertmode = addv; /* in case this processor had no cache */ 121*57b952d6SSatish Balay 122*57b952d6SSatish Balay /* first count number of contributors to each processor */ 123*57b952d6SSatish Balay nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 124*57b952d6SSatish Balay PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 125*57b952d6SSatish Balay owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 126*57b952d6SSatish Balay for ( i=0; i<baij->stash.n; i++ ) { 127*57b952d6SSatish Balay idx = baij->stash.idx[i]; 128*57b952d6SSatish Balay for ( j=0; j<size; j++ ) { 129*57b952d6SSatish Balay if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 130*57b952d6SSatish Balay nprocs[j]++; procs[j] = 1; owner[i] = j; break; 131*57b952d6SSatish Balay } 132*57b952d6SSatish Balay } 133*57b952d6SSatish Balay } 134*57b952d6SSatish Balay nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 135*57b952d6SSatish Balay 136*57b952d6SSatish Balay /* inform other processors of number of messages and max length*/ 137*57b952d6SSatish Balay work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 138*57b952d6SSatish Balay MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 139*57b952d6SSatish Balay nreceives = work[rank]; 140*57b952d6SSatish Balay MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 141*57b952d6SSatish Balay nmax = work[rank]; 142*57b952d6SSatish Balay PetscFree(work); 143*57b952d6SSatish Balay 144*57b952d6SSatish Balay /* post receives: 145*57b952d6SSatish Balay 1) each message will consist of ordered pairs 146*57b952d6SSatish Balay (global index,value) we store the global index as a double 147*57b952d6SSatish Balay to simplify the message passing. 148*57b952d6SSatish Balay 2) since we don't know how long each individual message is we 149*57b952d6SSatish Balay allocate the largest needed buffer for each receive. Potentially 150*57b952d6SSatish Balay this is a lot of wasted space. 151*57b952d6SSatish Balay 152*57b952d6SSatish Balay 153*57b952d6SSatish Balay This could be done better. 154*57b952d6SSatish Balay */ 155*57b952d6SSatish Balay rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 156*57b952d6SSatish Balay CHKPTRQ(rvalues); 157*57b952d6SSatish Balay recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 158*57b952d6SSatish Balay CHKPTRQ(recv_waits); 159*57b952d6SSatish Balay for ( i=0; i<nreceives; i++ ) { 160*57b952d6SSatish Balay MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 161*57b952d6SSatish Balay comm,recv_waits+i); 162*57b952d6SSatish Balay } 163*57b952d6SSatish Balay 164*57b952d6SSatish Balay /* do sends: 165*57b952d6SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 166*57b952d6SSatish Balay the ith processor 167*57b952d6SSatish Balay */ 168*57b952d6SSatish Balay svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 169*57b952d6SSatish Balay send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 170*57b952d6SSatish Balay CHKPTRQ(send_waits); 171*57b952d6SSatish Balay starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 172*57b952d6SSatish Balay starts[0] = 0; 173*57b952d6SSatish Balay for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 174*57b952d6SSatish Balay for ( i=0; i<baij->stash.n; i++ ) { 175*57b952d6SSatish Balay svalues[3*starts[owner[i]]] = (Scalar) baij->stash.idx[i]; 176*57b952d6SSatish Balay svalues[3*starts[owner[i]]+1] = (Scalar) baij->stash.idy[i]; 177*57b952d6SSatish Balay svalues[3*(starts[owner[i]]++)+2] = baij->stash.array[i]; 178*57b952d6SSatish Balay } 179*57b952d6SSatish Balay PetscFree(owner); 180*57b952d6SSatish Balay starts[0] = 0; 181*57b952d6SSatish Balay for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 182*57b952d6SSatish Balay count = 0; 183*57b952d6SSatish Balay for ( i=0; i<size; i++ ) { 184*57b952d6SSatish Balay if (procs[i]) { 185*57b952d6SSatish Balay MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 186*57b952d6SSatish Balay comm,send_waits+count++); 187*57b952d6SSatish Balay } 188*57b952d6SSatish Balay } 189*57b952d6SSatish Balay PetscFree(starts); PetscFree(nprocs); 190*57b952d6SSatish Balay 191*57b952d6SSatish Balay /* Free cache space */ 192*57b952d6SSatish Balay PLogInfo(0,"[%d]MatAssemblyBegin_MPIBAIJ:Number of off processor values %d\n",rank,baij->stash.n); 193*57b952d6SSatish Balay ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 194*57b952d6SSatish Balay 195*57b952d6SSatish Balay baij->svalues = svalues; baij->rvalues = rvalues; 196*57b952d6SSatish Balay baij->nsends = nsends; baij->nrecvs = nreceives; 197*57b952d6SSatish Balay baij->send_waits = send_waits; baij->recv_waits = recv_waits; 198*57b952d6SSatish Balay baij->rmax = nmax; 199*57b952d6SSatish Balay 200*57b952d6SSatish Balay return 0; 201*57b952d6SSatish Balay } 202*57b952d6SSatish Balay 203*57b952d6SSatish Balay 204*57b952d6SSatish Balay static int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 205*57b952d6SSatish Balay { 206*57b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 207*57b952d6SSatish Balay MPI_Status *send_status,recv_status; 208*57b952d6SSatish Balay int imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr; 209*57b952d6SSatish Balay int bs=baij->bs,row,col,other_disassembled; 210*57b952d6SSatish Balay Scalar *values,val; 211*57b952d6SSatish Balay InsertMode addv = baij->insertmode; 212*57b952d6SSatish Balay 213*57b952d6SSatish Balay /* wait on receives */ 214*57b952d6SSatish Balay while (count) { 215*57b952d6SSatish Balay MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status); 216*57b952d6SSatish Balay /* unpack receives into our local space */ 217*57b952d6SSatish Balay values = baij->rvalues + 3*imdex*baij->rmax; 218*57b952d6SSatish Balay MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 219*57b952d6SSatish Balay n = n/3; 220*57b952d6SSatish Balay for ( i=0; i<n; i++ ) { 221*57b952d6SSatish Balay row = (int) PetscReal(values[3*i]) - baij->rstart*bs; 222*57b952d6SSatish Balay col = (int) PetscReal(values[3*i+1]); 223*57b952d6SSatish Balay val = values[3*i+2]; 224*57b952d6SSatish Balay if (col >= baij->cstart*bs && col < baij->cend*bs) { 225*57b952d6SSatish Balay col -= baij->cstart*bs; 226*57b952d6SSatish Balay MatSetValues(baij->A,1,&row,1,&col,&val,addv); 227*57b952d6SSatish Balay } 228*57b952d6SSatish Balay else { 229*57b952d6SSatish Balay if (mat->was_assembled) { 230*57b952d6SSatish Balay if (!baij->colmap) {ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr);} 231*57b952d6SSatish Balay col = baij->colmap[col/bs]*bs + col%bs; 232*57b952d6SSatish Balay if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 233*57b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 234*57b952d6SSatish Balay col = (int) PetscReal(values[3*i+1]); 235*57b952d6SSatish Balay } 236*57b952d6SSatish Balay } 237*57b952d6SSatish Balay MatSetValues(baij->B,1,&row,1,&col,&val,addv); 238*57b952d6SSatish Balay } 239*57b952d6SSatish Balay } 240*57b952d6SSatish Balay count--; 241*57b952d6SSatish Balay } 242*57b952d6SSatish Balay PetscFree(baij->recv_waits); PetscFree(baij->rvalues); 243*57b952d6SSatish Balay 244*57b952d6SSatish Balay /* wait on sends */ 245*57b952d6SSatish Balay if (baij->nsends) { 246*57b952d6SSatish Balay send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status)); 247*57b952d6SSatish Balay CHKPTRQ(send_status); 248*57b952d6SSatish Balay MPI_Waitall(baij->nsends,baij->send_waits,send_status); 249*57b952d6SSatish Balay PetscFree(send_status); 250*57b952d6SSatish Balay } 251*57b952d6SSatish Balay PetscFree(baij->send_waits); PetscFree(baij->svalues); 252*57b952d6SSatish Balay 253*57b952d6SSatish Balay baij->insertmode = NOT_SET_VALUES; 254*57b952d6SSatish Balay ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr); 255*57b952d6SSatish Balay ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr); 256*57b952d6SSatish Balay 257*57b952d6SSatish Balay /* determine if any processor has disassembled, if so we must 258*57b952d6SSatish Balay also disassemble ourselfs, in order that we may reassemble. */ 259*57b952d6SSatish Balay MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 260*57b952d6SSatish Balay if (mat->was_assembled && !other_disassembled) { 261*57b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 262*57b952d6SSatish Balay } 263*57b952d6SSatish Balay 264*57b952d6SSatish Balay if (!mat->was_assembled && mode == FINAL_ASSEMBLY) { 265*57b952d6SSatish Balay ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr); 266*57b952d6SSatish Balay } 267*57b952d6SSatish Balay ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr); 268*57b952d6SSatish Balay ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr); 269*57b952d6SSatish Balay 270*57b952d6SSatish Balay if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;} 271*57b952d6SSatish Balay return 0; 272*57b952d6SSatish Balay } 273*57b952d6SSatish Balay 274*57b952d6SSatish Balay static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer) 275*57b952d6SSatish Balay { 276*57b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 277*57b952d6SSatish Balay int ierr; 278*57b952d6SSatish Balay 279*57b952d6SSatish Balay if (baij->size == 1) { 280*57b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 281*57b952d6SSatish Balay } 282*57b952d6SSatish Balay else SETERRQ(1,"MatView_MPIBAIJ_Binary:Only uniprocessor output supported"); 283*57b952d6SSatish Balay return 0; 284*57b952d6SSatish Balay } 285*57b952d6SSatish Balay 286*57b952d6SSatish Balay static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 287*57b952d6SSatish Balay { 288*57b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 289*57b952d6SSatish Balay int ierr, format,rank,bs=baij->bs,bs2=baij->bs2; 290*57b952d6SSatish Balay FILE *fd; 291*57b952d6SSatish Balay ViewerType vtype; 292*57b952d6SSatish Balay 293*57b952d6SSatish Balay ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 294*57b952d6SSatish Balay if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 295*57b952d6SSatish Balay ierr = ViewerGetFormat(viewer,&format); 296*57b952d6SSatish Balay if (format == ASCII_FORMAT_INFO_DETAILED) { 297*57b952d6SSatish Balay int nz, nzalloc, mem; 298*57b952d6SSatish Balay MPI_Comm_rank(mat->comm,&rank); 299*57b952d6SSatish Balay ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 300*57b952d6SSatish Balay ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem); 301*57b952d6SSatish Balay PetscSequentialPhaseBegin(mat->comm,1); 302*57b952d6SSatish Balay fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 303*57b952d6SSatish Balay rank,baij->m,nz*bs,nzalloc*bs,baij->bs,mem); 304*57b952d6SSatish Balay ierr = MatGetInfo(baij->A,MAT_LOCAL,&nz,&nzalloc,&mem); 305*57b952d6SSatish Balay fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,nz*bs); 306*57b952d6SSatish Balay ierr = MatGetInfo(baij->B,MAT_LOCAL,&nz,&nzalloc,&mem); 307*57b952d6SSatish Balay fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,nz*bs); 308*57b952d6SSatish Balay fflush(fd); 309*57b952d6SSatish Balay PetscSequentialPhaseEnd(mat->comm,1); 310*57b952d6SSatish Balay ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr); 311*57b952d6SSatish Balay return 0; 312*57b952d6SSatish Balay } 313*57b952d6SSatish Balay else if (format == ASCII_FORMAT_INFO) { 314*57b952d6SSatish Balay return 0; 315*57b952d6SSatish Balay } 316*57b952d6SSatish Balay } 317*57b952d6SSatish Balay 318*57b952d6SSatish Balay if (vtype == DRAW_VIEWER) { 319*57b952d6SSatish Balay Draw draw; 320*57b952d6SSatish Balay PetscTruth isnull; 321*57b952d6SSatish Balay ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 322*57b952d6SSatish Balay ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 323*57b952d6SSatish Balay } 324*57b952d6SSatish Balay 325*57b952d6SSatish Balay if (vtype == ASCII_FILE_VIEWER) { 326*57b952d6SSatish Balay ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 327*57b952d6SSatish Balay PetscSequentialPhaseBegin(mat->comm,1); 328*57b952d6SSatish Balay fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 329*57b952d6SSatish Balay baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n, 330*57b952d6SSatish Balay baij->cstart*bs,baij->cend*bs); 331*57b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 332*57b952d6SSatish Balay ierr = MatView(baij->B,viewer); CHKERRQ(ierr); 333*57b952d6SSatish Balay fflush(fd); 334*57b952d6SSatish Balay PetscSequentialPhaseEnd(mat->comm,1); 335*57b952d6SSatish Balay } 336*57b952d6SSatish Balay else { 337*57b952d6SSatish Balay int size = baij->size; 338*57b952d6SSatish Balay rank = baij->rank; 339*57b952d6SSatish Balay if (size == 1) { 340*57b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 341*57b952d6SSatish Balay } 342*57b952d6SSatish Balay else { 343*57b952d6SSatish Balay /* assemble the entire matrix onto first processor. */ 344*57b952d6SSatish Balay Mat A; 345*57b952d6SSatish Balay Mat_SeqBAIJ *Aloc; 346*57b952d6SSatish Balay int M = baij->M, N = baij->N,*ai,*aj,row,col,i,j,k,*rvals; 347*57b952d6SSatish Balay int mbs=baij->mbs; 348*57b952d6SSatish Balay Scalar *a; 349*57b952d6SSatish Balay 350*57b952d6SSatish Balay if (!rank) { 351*57b952d6SSatish Balay ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 352*57b952d6SSatish Balay CHKERRQ(ierr); 353*57b952d6SSatish Balay } 354*57b952d6SSatish Balay else { 355*57b952d6SSatish Balay ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 356*57b952d6SSatish Balay CHKERRQ(ierr); 357*57b952d6SSatish Balay } 358*57b952d6SSatish Balay PLogObjectParent(mat,A); 359*57b952d6SSatish Balay 360*57b952d6SSatish Balay /* copy over the A part */ 361*57b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->A->data; 362*57b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 363*57b952d6SSatish Balay row = baij->rstart; 364*57b952d6SSatish Balay rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 365*57b952d6SSatish Balay 366*57b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 367*57b952d6SSatish Balay rvals[0] = bs*(baij->rstart + i); 368*57b952d6SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 369*57b952d6SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 370*57b952d6SSatish Balay col = (baij->cstart+aj[j])*bs; 371*57b952d6SSatish Balay for (k=0; k<bs; k++ ) { 372*57b952d6SSatish Balay ierr = MatSetValues(A,bs,rvals,1,&col,a+j*bs2,INSERT_VALUES);CHKERRQ(ierr); 373*57b952d6SSatish Balay col++; 374*57b952d6SSatish Balay } 375*57b952d6SSatish Balay } 376*57b952d6SSatish Balay } 377*57b952d6SSatish Balay /* copy over the B part */ 378*57b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->B->data; 379*57b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 380*57b952d6SSatish Balay row = baij->rstart*bs; 381*57b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 382*57b952d6SSatish Balay rvals[0] = bs*(baij->rstart + i); 383*57b952d6SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 384*57b952d6SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 385*57b952d6SSatish Balay col = baij->garray[aj[j]]*bs; 386*57b952d6SSatish Balay for (k=0; k<bs; k++ ) { 387*57b952d6SSatish Balay ierr = MatSetValues(A,bs,rvals,1,&col,a+j*bs2,INSERT_VALUES);CHKERRQ(ierr); 388*57b952d6SSatish Balay col++; 389*57b952d6SSatish Balay } 390*57b952d6SSatish Balay } 391*57b952d6SSatish Balay } 392*57b952d6SSatish Balay PetscFree(rvals); 393*57b952d6SSatish Balay ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 394*57b952d6SSatish Balay ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 395*57b952d6SSatish Balay if (!rank) { 396*57b952d6SSatish Balay ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 397*57b952d6SSatish Balay } 398*57b952d6SSatish Balay ierr = MatDestroy(A); CHKERRQ(ierr); 399*57b952d6SSatish Balay } 400*57b952d6SSatish Balay } 401*57b952d6SSatish Balay return 0; 402*57b952d6SSatish Balay } 403*57b952d6SSatish Balay 404*57b952d6SSatish Balay 405*57b952d6SSatish Balay 406*57b952d6SSatish Balay static int MatView_MPIBAIJ(PetscObject obj,Viewer viewer) 407*57b952d6SSatish Balay { 408*57b952d6SSatish Balay Mat mat = (Mat) obj; 409*57b952d6SSatish Balay int ierr; 410*57b952d6SSatish Balay ViewerType vtype; 411*57b952d6SSatish Balay 412*57b952d6SSatish Balay if (!viewer) { 413*57b952d6SSatish Balay viewer = STDOUT_VIEWER_SELF; 414*57b952d6SSatish Balay } 415*57b952d6SSatish Balay ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 416*57b952d6SSatish Balay if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 417*57b952d6SSatish Balay vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 418*57b952d6SSatish Balay ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 419*57b952d6SSatish Balay } 420*57b952d6SSatish Balay else if (vtype == BINARY_FILE_VIEWER) { 421*57b952d6SSatish Balay return MatView_MPIBAIJ_Binary(mat,viewer); 422*57b952d6SSatish Balay } 423*57b952d6SSatish Balay return 0; 424*57b952d6SSatish Balay } 425*57b952d6SSatish Balay 42679bdfe76SSatish Balay static int MatDestroy_MPIBAIJ(PetscObject obj) 42779bdfe76SSatish Balay { 42879bdfe76SSatish Balay Mat mat = (Mat) obj; 42979bdfe76SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 43079bdfe76SSatish Balay int ierr; 43179bdfe76SSatish Balay 43279bdfe76SSatish Balay #if defined(PETSC_LOG) 43379bdfe76SSatish Balay PLogObjectState(obj,"Rows=%d, Cols=%d",baij->M,baij->N); 43479bdfe76SSatish Balay #endif 43579bdfe76SSatish Balay 43679bdfe76SSatish Balay PetscFree(baij->rowners); 43779bdfe76SSatish Balay ierr = MatDestroy(baij->A); CHKERRQ(ierr); 43879bdfe76SSatish Balay ierr = MatDestroy(baij->B); CHKERRQ(ierr); 43979bdfe76SSatish Balay if (baij->colmap) PetscFree(baij->colmap); 44079bdfe76SSatish Balay if (baij->garray) PetscFree(baij->garray); 44179bdfe76SSatish Balay if (baij->lvec) VecDestroy(baij->lvec); 44279bdfe76SSatish Balay if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 44379bdfe76SSatish Balay if (baij->rowvalues) PetscFree(baij->rowvalues); 44479bdfe76SSatish Balay PetscFree(baij); 44579bdfe76SSatish Balay PLogObjectDestroy(mat); 44679bdfe76SSatish Balay PetscHeaderDestroy(mat); 44779bdfe76SSatish Balay return 0; 44879bdfe76SSatish Balay } 44979bdfe76SSatish Balay 450*57b952d6SSatish Balay static int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 451*57b952d6SSatish Balay { 452*57b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 453*57b952d6SSatish Balay *m = mat->M; *n = mat->N; 454*57b952d6SSatish Balay return 0; 455*57b952d6SSatish Balay } 456*57b952d6SSatish Balay 457*57b952d6SSatish Balay static int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 458*57b952d6SSatish Balay { 459*57b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 460*57b952d6SSatish Balay *m = mat->m; *n = mat->N; 461*57b952d6SSatish Balay return 0; 462*57b952d6SSatish Balay } 463*57b952d6SSatish Balay 464*57b952d6SSatish Balay static int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 465*57b952d6SSatish Balay { 466*57b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 467*57b952d6SSatish Balay *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs; 468*57b952d6SSatish Balay return 0; 469*57b952d6SSatish Balay } 470*57b952d6SSatish Balay 47179bdfe76SSatish Balay /* -------------------------------------------------------------------*/ 47279bdfe76SSatish Balay static struct _MatOps MatOps = { 473*57b952d6SSatish Balay MatSetValues_MPIBAIJ,0,0,0, 47479bdfe76SSatish Balay 0,0,0,0, 47579bdfe76SSatish Balay 0,0,0,0, 47679bdfe76SSatish Balay 0,0,0,0, 47779bdfe76SSatish Balay 0,0,0,0, 478*57b952d6SSatish Balay MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,0, 479*57b952d6SSatish Balay 0,0,MatGetReordering_MPIBAIJ,0, 480*57b952d6SSatish Balay 0,0,0,MatGetSize_MPIBAIJ, 481*57b952d6SSatish Balay MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,0,0, 48279bdfe76SSatish Balay 0,0,0,0, 48379bdfe76SSatish Balay 0,0,0,0, 48479bdfe76SSatish Balay 0,0,0,0, 48579bdfe76SSatish Balay 0,0,0,0, 48679bdfe76SSatish Balay 0}; 48779bdfe76SSatish Balay 48879bdfe76SSatish Balay 48979bdfe76SSatish Balay /*@C 49079bdfe76SSatish Balay MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 49179bdfe76SSatish Balay (block compressed row). For good matrix assembly performance 49279bdfe76SSatish Balay the user should preallocate the matrix storage by setting the parameters 49379bdfe76SSatish Balay d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 49479bdfe76SSatish Balay performance can be increased by more than a factor of 50. 49579bdfe76SSatish Balay 49679bdfe76SSatish Balay Input Parameters: 49779bdfe76SSatish Balay . comm - MPI communicator 49879bdfe76SSatish Balay . bs - size of blockk 49979bdfe76SSatish Balay . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 50079bdfe76SSatish Balay . n - number of local columns (or PETSC_DECIDE to have calculated 50179bdfe76SSatish Balay if N is given) 50279bdfe76SSatish Balay . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 50379bdfe76SSatish Balay . N - number of global columns (or PETSC_DECIDE to have calculated 50479bdfe76SSatish Balay if n is given) 50579bdfe76SSatish Balay . d_nz - number of block nonzeros per block row in diagonal portion of local 50679bdfe76SSatish Balay submatrix (same for all local rows) 50779bdfe76SSatish Balay . d_nzz - number of block nonzeros per block row in diagonal portion of local 50879bdfe76SSatish Balay submatrix or null (possibly different for each row). You must leave 50979bdfe76SSatish Balay room for the diagonal entry even if it is zero. 51079bdfe76SSatish Balay . o_nz - number of block nonzeros per block row in off-diagonal portion of local 51179bdfe76SSatish Balay submatrix (same for all local rows). 51279bdfe76SSatish Balay . o_nzz - number of block nonzeros per block row in off-diagonal portion of local 51379bdfe76SSatish Balay submatrix or null (possibly different for each row). 51479bdfe76SSatish Balay 51579bdfe76SSatish Balay Output Parameter: 51679bdfe76SSatish Balay . A - the matrix 51779bdfe76SSatish Balay 51879bdfe76SSatish Balay Notes: 51979bdfe76SSatish Balay The user MUST specify either the local or global matrix dimensions 52079bdfe76SSatish Balay (possibly both). 52179bdfe76SSatish Balay 52279bdfe76SSatish Balay Storage Information: 52379bdfe76SSatish Balay For a square global matrix we define each processor's diagonal portion 52479bdfe76SSatish Balay to be its local rows and the corresponding columns (a square submatrix); 52579bdfe76SSatish Balay each processor's off-diagonal portion encompasses the remainder of the 52679bdfe76SSatish Balay local matrix (a rectangular submatrix). 52779bdfe76SSatish Balay 52879bdfe76SSatish Balay The user can specify preallocated storage for the diagonal part of 52979bdfe76SSatish Balay the local submatrix with either d_nz or d_nnz (not both). Set 53079bdfe76SSatish Balay d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 53179bdfe76SSatish Balay memory allocation. Likewise, specify preallocated storage for the 53279bdfe76SSatish Balay off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 53379bdfe76SSatish Balay 53479bdfe76SSatish Balay Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 53579bdfe76SSatish Balay the figure below we depict these three local rows and all columns (0-11). 53679bdfe76SSatish Balay 53779bdfe76SSatish Balay $ 0 1 2 3 4 5 6 7 8 9 10 11 53879bdfe76SSatish Balay $ ------------------- 53979bdfe76SSatish Balay $ row 3 | o o o d d d o o o o o o 54079bdfe76SSatish Balay $ row 4 | o o o d d d o o o o o o 54179bdfe76SSatish Balay $ row 5 | o o o d d d o o o o o o 54279bdfe76SSatish Balay $ ------------------- 54379bdfe76SSatish Balay $ 54479bdfe76SSatish Balay 54579bdfe76SSatish Balay Thus, any entries in the d locations are stored in the d (diagonal) 54679bdfe76SSatish Balay submatrix, and any entries in the o locations are stored in the 54779bdfe76SSatish Balay o (off-diagonal) submatrix. Note that the d and the o submatrices are 548*57b952d6SSatish Balay stored simply in the MATSEQBAIJ format for compressed row storage. 54979bdfe76SSatish Balay 55079bdfe76SSatish Balay Now d_nz should indicate the number of nonzeros per row in the d matrix, 55179bdfe76SSatish Balay and o_nz should indicate the number of nonzeros per row in the o matrix. 55279bdfe76SSatish Balay In general, for PDE problems in which most nonzeros are near the diagonal, 55379bdfe76SSatish Balay one expects d_nz >> o_nz. For additional details, see the users manual 55479bdfe76SSatish Balay chapter on matrices and the file $(PETSC_DIR)/Performance. 55579bdfe76SSatish Balay 55679bdfe76SSatish Balay .keywords: matrix, aij, compressed row, sparse, parallel 55779bdfe76SSatish Balay 55879bdfe76SSatish Balay .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues() 55979bdfe76SSatish Balay @*/ 56079bdfe76SSatish Balay int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N, 56179bdfe76SSatish Balay int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 56279bdfe76SSatish Balay { 56379bdfe76SSatish Balay Mat B; 56479bdfe76SSatish Balay Mat_MPIBAIJ *b; 56579bdfe76SSatish Balay int ierr, i,sum[2],work[2],mbs,nbs,Mbs,Nbs; 56679bdfe76SSatish Balay 56779bdfe76SSatish Balay if (bs < 1) SETERRQ(1,"MatCreateMPIBAIJ: invalid block size specified"); 56879bdfe76SSatish Balay *A = 0; 56979bdfe76SSatish Balay PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIBAIJ,comm); 57079bdfe76SSatish Balay PLogObjectCreate(B); 57179bdfe76SSatish Balay B->data = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b); 57279bdfe76SSatish Balay PetscMemzero(b,sizeof(Mat_MPIBAIJ)); 57379bdfe76SSatish Balay PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 57479bdfe76SSatish Balay B->destroy = MatDestroy_MPIBAIJ; 57579bdfe76SSatish Balay B->view = MatView_MPIBAIJ; 576*57b952d6SSatish Balay 57779bdfe76SSatish Balay B->factor = 0; 57879bdfe76SSatish Balay B->assembled = PETSC_FALSE; 57979bdfe76SSatish Balay 58079bdfe76SSatish Balay b->insertmode = NOT_SET_VALUES; 58179bdfe76SSatish Balay MPI_Comm_rank(comm,&b->rank); 58279bdfe76SSatish Balay MPI_Comm_size(comm,&b->size); 58379bdfe76SSatish Balay 58479bdfe76SSatish Balay if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 585*57b952d6SSatish Balay SETERRQ(1,"MatCreateMPIBAIJ:Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 58679bdfe76SSatish Balay 58779bdfe76SSatish Balay if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 58879bdfe76SSatish Balay work[0] = m; work[1] = n; 58979bdfe76SSatish Balay mbs = m/bs; nbs = n/bs; 59079bdfe76SSatish Balay if (mbs*bs != m || nbs*n != nbs) SETERRQ(1,"MatCreateMPIBAIJ: No of local rows, cols must be divisible by blocksize"); 59179bdfe76SSatish Balay MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 59279bdfe76SSatish Balay if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 59379bdfe76SSatish Balay if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 59479bdfe76SSatish Balay } 59579bdfe76SSatish Balay if (m == PETSC_DECIDE) { 59679bdfe76SSatish Balay Mbs = M/bs; 59779bdfe76SSatish Balay if (Mbs*bs != M) SETERRQ(1,"MatCreateMPIBAIJ: No of global rows must be divisible by blocksize"); 59879bdfe76SSatish Balay mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 59979bdfe76SSatish Balay m = mbs*bs; 60079bdfe76SSatish Balay } 60179bdfe76SSatish Balay if (n == PETSC_DECIDE) { 60279bdfe76SSatish Balay Nbs = N/bs; 60379bdfe76SSatish Balay if (Nbs*bs != N) SETERRQ(1,"MatCreateMPIBAIJ: No of global cols must be divisible by blocksize"); 60479bdfe76SSatish Balay nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 60579bdfe76SSatish Balay n = nbs*bs; 60679bdfe76SSatish Balay } 60779bdfe76SSatish Balay 60879bdfe76SSatish Balay b->m = m; B->m = m; 60979bdfe76SSatish Balay b->n = n; B->n = n; 61079bdfe76SSatish Balay b->N = N; B->N = N; 61179bdfe76SSatish Balay b->M = M; B->M = M; 61279bdfe76SSatish Balay b->bs = bs; 61379bdfe76SSatish Balay b->bs2 = bs*bs; 61479bdfe76SSatish Balay b->mbs = mbs; 61579bdfe76SSatish Balay b->nbs = nbs; 61679bdfe76SSatish Balay b->Mbs = Mbs; 61779bdfe76SSatish Balay b->Nbs = Nbs; 61879bdfe76SSatish Balay 61979bdfe76SSatish Balay /* build local table of row and column ownerships */ 62079bdfe76SSatish Balay b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 62179bdfe76SSatish Balay PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIBAIJ)); 62279bdfe76SSatish Balay b->cowners = b->rowners + b->size + 1; 62379bdfe76SSatish Balay MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm); 62479bdfe76SSatish Balay b->rowners[0] = 0; 62579bdfe76SSatish Balay for ( i=2; i<=b->size; i++ ) { 62679bdfe76SSatish Balay b->rowners[i] += b->rowners[i-1]; 62779bdfe76SSatish Balay } 62879bdfe76SSatish Balay b->rstart = b->rowners[b->rank]; 62979bdfe76SSatish Balay b->rend = b->rowners[b->rank+1]; 63079bdfe76SSatish Balay MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm); 63179bdfe76SSatish Balay b->cowners[0] = 0; 63279bdfe76SSatish Balay for ( i=2; i<=b->size; i++ ) { 63379bdfe76SSatish Balay b->cowners[i] += b->cowners[i-1]; 63479bdfe76SSatish Balay } 63579bdfe76SSatish Balay b->cstart = b->cowners[b->rank]; 63679bdfe76SSatish Balay b->cend = b->cowners[b->rank+1]; 63779bdfe76SSatish Balay 63879bdfe76SSatish Balay if (d_nz == PETSC_DEFAULT) d_nz = 5; 63979bdfe76SSatish Balay ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 64079bdfe76SSatish Balay PLogObjectParent(B,b->A); 64179bdfe76SSatish Balay if (o_nz == PETSC_DEFAULT) o_nz = 0; 64279bdfe76SSatish Balay ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 64379bdfe76SSatish Balay PLogObjectParent(B,b->B); 64479bdfe76SSatish Balay 64579bdfe76SSatish Balay /* build cache for off array entries formed */ 64679bdfe76SSatish Balay ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 64779bdfe76SSatish Balay b->colmap = 0; 64879bdfe76SSatish Balay b->garray = 0; 64979bdfe76SSatish Balay b->roworiented = 1; 65079bdfe76SSatish Balay 65179bdfe76SSatish Balay /* stuff used for matrix vector multiply */ 65279bdfe76SSatish Balay b->lvec = 0; 65379bdfe76SSatish Balay b->Mvctx = 0; 65479bdfe76SSatish Balay 65579bdfe76SSatish Balay /* stuff for MatGetRow() */ 65679bdfe76SSatish Balay b->rowindices = 0; 65779bdfe76SSatish Balay b->rowvalues = 0; 65879bdfe76SSatish Balay b->getrowactive = PETSC_FALSE; 65979bdfe76SSatish Balay 66079bdfe76SSatish Balay *A = B; 66179bdfe76SSatish Balay return 0; 66279bdfe76SSatish Balay } 663*57b952d6SSatish Balay 664*57b952d6SSatish Balay #include "sys.h" 665*57b952d6SSatish Balay 666*57b952d6SSatish Balay int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 667*57b952d6SSatish Balay { 668*57b952d6SSatish Balay Mat A; 669*57b952d6SSatish Balay int i, nz, ierr, j,rstart, rend, fd; 670*57b952d6SSatish Balay Scalar *vals,*buf; 671*57b952d6SSatish Balay MPI_Comm comm = ((PetscObject)viewer)->comm; 672*57b952d6SSatish Balay MPI_Status status; 673*57b952d6SSatish Balay int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 674*57b952d6SSatish Balay int *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf; 675*57b952d6SSatish Balay int flg,tag = ((PetscObject)viewer)->tag,bs=1,bs2,Mbs,mbs,extra_rows; 676*57b952d6SSatish Balay int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 677*57b952d6SSatish Balay int dcount,kmax,k,nzcount,tmp; 678*57b952d6SSatish Balay 679*57b952d6SSatish Balay 680*57b952d6SSatish Balay ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 681*57b952d6SSatish Balay bs2 = bs*bs; 682*57b952d6SSatish Balay 683*57b952d6SSatish Balay MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 684*57b952d6SSatish Balay if (!rank) { 685*57b952d6SSatish Balay ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 686*57b952d6SSatish Balay ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr); 687*57b952d6SSatish Balay if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJ:not matrix object"); 688*57b952d6SSatish Balay } 689*57b952d6SSatish Balay 690*57b952d6SSatish Balay MPI_Bcast(header+1,3,MPI_INT,0,comm); 691*57b952d6SSatish Balay M = header[1]; N = header[2]; 692*57b952d6SSatish Balay 693*57b952d6SSatish Balay 694*57b952d6SSatish Balay if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices"); 695*57b952d6SSatish Balay 696*57b952d6SSatish Balay /* 697*57b952d6SSatish Balay This code adds extra rows to make sure the number of rows is 698*57b952d6SSatish Balay divisible by the blocksize 699*57b952d6SSatish Balay */ 700*57b952d6SSatish Balay Mbs = M/bs; 701*57b952d6SSatish Balay extra_rows = bs - M + bs*(Mbs); 702*57b952d6SSatish Balay if (extra_rows == bs) extra_rows = 0; 703*57b952d6SSatish Balay else Mbs++; 704*57b952d6SSatish Balay if (extra_rows &&!rank) { 705*57b952d6SSatish Balay PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize"); 706*57b952d6SSatish Balay } 707*57b952d6SSatish Balay /* determine ownership of all rows */ 708*57b952d6SSatish Balay mbs = Mbs/size + ((Mbs % size) > rank); 709*57b952d6SSatish Balay m = mbs * bs; 710*57b952d6SSatish Balay rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 711*57b952d6SSatish Balay MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm); 712*57b952d6SSatish Balay rowners[0] = 0; 713*57b952d6SSatish Balay for ( i=2; i<=size; i++ ) { 714*57b952d6SSatish Balay rowners[i] += rowners[i-1]; 715*57b952d6SSatish Balay } 716*57b952d6SSatish Balay rstart = rowners[rank]; 717*57b952d6SSatish Balay rend = rowners[rank+1]; 718*57b952d6SSatish Balay 719*57b952d6SSatish Balay /* distribute row lengths to all processors */ 720*57b952d6SSatish Balay locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens); 721*57b952d6SSatish Balay if (!rank) { 722*57b952d6SSatish Balay rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths); 723*57b952d6SSatish Balay ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 724*57b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 725*57b952d6SSatish Balay sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 726*57b952d6SSatish Balay for ( i=0; i<size; i++ ) sndcounts[i] = (rowners[i+1] - rowners[i])*bs; 727*57b952d6SSatish Balay MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm); 728*57b952d6SSatish Balay PetscFree(sndcounts); 729*57b952d6SSatish Balay } 730*57b952d6SSatish Balay else { 731*57b952d6SSatish Balay MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm); 732*57b952d6SSatish Balay } 733*57b952d6SSatish Balay 734*57b952d6SSatish Balay if (!rank) { 735*57b952d6SSatish Balay /* calculate the number of nonzeros on each processor */ 736*57b952d6SSatish Balay procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 737*57b952d6SSatish Balay PetscMemzero(procsnz,size*sizeof(int)); 738*57b952d6SSatish Balay for ( i=0; i<size; i++ ) { 739*57b952d6SSatish Balay for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) { 740*57b952d6SSatish Balay procsnz[i] += rowlengths[j]; 741*57b952d6SSatish Balay } 742*57b952d6SSatish Balay } 743*57b952d6SSatish Balay PetscFree(rowlengths); 744*57b952d6SSatish Balay 745*57b952d6SSatish Balay /* determine max buffer needed and allocate it */ 746*57b952d6SSatish Balay maxnz = 0; 747*57b952d6SSatish Balay for ( i=0; i<size; i++ ) { 748*57b952d6SSatish Balay maxnz = PetscMax(maxnz,procsnz[i]); 749*57b952d6SSatish Balay } 750*57b952d6SSatish Balay cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 751*57b952d6SSatish Balay 752*57b952d6SSatish Balay /* read in my part of the matrix column indices */ 753*57b952d6SSatish Balay nz = procsnz[0]; 754*57b952d6SSatish Balay ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 755*57b952d6SSatish Balay mycols = ibuf; 756*57b952d6SSatish Balay ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr); 757*57b952d6SSatish Balay /* read in every ones (except the last) and ship off */ 758*57b952d6SSatish Balay for ( i=1; i<size-1; i++ ) { 759*57b952d6SSatish Balay nz = procsnz[i]; 760*57b952d6SSatish Balay ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 761*57b952d6SSatish Balay MPI_Send(cols,nz,MPI_INT,i,tag,comm); 762*57b952d6SSatish Balay } 763*57b952d6SSatish Balay /* read in the stuff for the last proc */ 764*57b952d6SSatish Balay if ( size != 1 ) { 765*57b952d6SSatish Balay nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 766*57b952d6SSatish Balay ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 767*57b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i; 768*57b952d6SSatish Balay MPI_Send(cols,nz+extra_rows,MPI_INT,i,tag,comm); 769*57b952d6SSatish Balay } 770*57b952d6SSatish Balay PetscFree(cols); 771*57b952d6SSatish Balay } 772*57b952d6SSatish Balay else { 773*57b952d6SSatish Balay /* determine buffer space needed for message */ 774*57b952d6SSatish Balay nz = 0; 775*57b952d6SSatish Balay for ( i=0; i<m; i++ ) { 776*57b952d6SSatish Balay nz += locrowlens[i]; 777*57b952d6SSatish Balay } 778*57b952d6SSatish Balay ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 779*57b952d6SSatish Balay mycols = ibuf; 780*57b952d6SSatish Balay /* receive message of column indices*/ 781*57b952d6SSatish Balay MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 782*57b952d6SSatish Balay MPI_Get_count(&status,MPI_INT,&maxnz); 783*57b952d6SSatish Balay if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file"); 784*57b952d6SSatish Balay } 785*57b952d6SSatish Balay 786*57b952d6SSatish Balay /* loop over local rows, determining number of off diagonal entries */ 787*57b952d6SSatish Balay dlens = (int *) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(dlens); 788*57b952d6SSatish Balay odlens = dlens + (rstart-rend); 789*57b952d6SSatish Balay mask = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask); 790*57b952d6SSatish Balay PetscMemzero(mask,Mbs*sizeof(int)); 791*57b952d6SSatish Balay masked1 = mask + Mbs; 792*57b952d6SSatish Balay masked2 = masked1 + Mbs; 793*57b952d6SSatish Balay rowcount = 0; nzcount = 0; 794*57b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 795*57b952d6SSatish Balay dcount = 0; 796*57b952d6SSatish Balay odcount = 0; 797*57b952d6SSatish Balay for ( j=0; j<bs; j++ ) { 798*57b952d6SSatish Balay kmax = locrowlens[rowcount]; 799*57b952d6SSatish Balay for ( k=0; k<kmax; k++ ) { 800*57b952d6SSatish Balay tmp = mycols[nzcount++]/bs; 801*57b952d6SSatish Balay if (!mask[tmp]) { 802*57b952d6SSatish Balay mask[tmp] = 1; 803*57b952d6SSatish Balay if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp; 804*57b952d6SSatish Balay else masked1[dcount++] = tmp; 805*57b952d6SSatish Balay } 806*57b952d6SSatish Balay } 807*57b952d6SSatish Balay rowcount++; 808*57b952d6SSatish Balay } 809*57b952d6SSatish Balay dlens[i] = dcount; 810*57b952d6SSatish Balay odlens[i] = odcount; 811*57b952d6SSatish Balay /* zero out the mask elements we set */ 812*57b952d6SSatish Balay for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0; 813*57b952d6SSatish Balay for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0; 814*57b952d6SSatish Balay } 815*57b952d6SSatish Balay /* create our matrix */ 816*57b952d6SSatish Balay ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr); 817*57b952d6SSatish Balay A = *newmat; 818*57b952d6SSatish Balay MatSetOption(A,COLUMNS_SORTED); 819*57b952d6SSatish Balay 820*57b952d6SSatish Balay if (!rank) { 821*57b952d6SSatish Balay buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf); 822*57b952d6SSatish Balay 823*57b952d6SSatish Balay /* read in my part of the matrix numerical values */ 824*57b952d6SSatish Balay nz = procsnz[0]; 825*57b952d6SSatish Balay vals = buf; 826*57b952d6SSatish Balay ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 827*57b952d6SSatish Balay 828*57b952d6SSatish Balay /* insert into matrix */ 829*57b952d6SSatish Balay jj = rstart*bs; 830*57b952d6SSatish Balay for ( i=0; i<m; i++ ) { 831*57b952d6SSatish Balay ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 832*57b952d6SSatish Balay mycols += locrowlens[i]; 833*57b952d6SSatish Balay vals += locrowlens[i]; 834*57b952d6SSatish Balay jj++; 835*57b952d6SSatish Balay } 836*57b952d6SSatish Balay 837*57b952d6SSatish Balay /* read in other processors( except the last one) and ship out */ 838*57b952d6SSatish Balay for ( i=1; i<size-1; i++ ) { 839*57b952d6SSatish Balay nz = procsnz[i]; 840*57b952d6SSatish Balay vals = buf; 841*57b952d6SSatish Balay ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 842*57b952d6SSatish Balay MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 843*57b952d6SSatish Balay } 844*57b952d6SSatish Balay /* the last proc */ 845*57b952d6SSatish Balay if ( size != 1 ){ 846*57b952d6SSatish Balay nz = procsnz[i] - extra_rows; 847*57b952d6SSatish Balay ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 848*57b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0; 849*57b952d6SSatish Balay MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,i,A->tag,comm); 850*57b952d6SSatish Balay } 851*57b952d6SSatish Balay PetscFree(procsnz); 852*57b952d6SSatish Balay } 853*57b952d6SSatish Balay else { 854*57b952d6SSatish Balay /* receive numeric values */ 855*57b952d6SSatish Balay buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf); 856*57b952d6SSatish Balay 857*57b952d6SSatish Balay /* receive message of values*/ 858*57b952d6SSatish Balay vals = buf; 859*57b952d6SSatish Balay MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 860*57b952d6SSatish Balay MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 861*57b952d6SSatish Balay if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file"); 862*57b952d6SSatish Balay 863*57b952d6SSatish Balay /* insert into matrix */ 864*57b952d6SSatish Balay jj = rstart*bs; 865*57b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 866*57b952d6SSatish Balay ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 867*57b952d6SSatish Balay mycols += locrowlens[i]; 868*57b952d6SSatish Balay vals += locrowlens[i]; 869*57b952d6SSatish Balay jj++; 870*57b952d6SSatish Balay } 871*57b952d6SSatish Balay } 872*57b952d6SSatish Balay PetscFree(locrowlens); 873*57b952d6SSatish Balay PetscFree(buf); 874*57b952d6SSatish Balay PetscFree(ibuf); 875*57b952d6SSatish Balay PetscFree(rowners); 876*57b952d6SSatish Balay PetscFree(dlens); 877*57b952d6SSatish Balay ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 878*57b952d6SSatish Balay ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 879*57b952d6SSatish Balay return 0; 880*57b952d6SSatish Balay } 881*57b952d6SSatish Balay 882*57b952d6SSatish Balay 883