1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 /*MC 6 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 7 8 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 9 and MATMPIAIJ otherwise. As a result, for single process communicators, 10 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 11 for communicators controlling multiple processes. It is recommended that you call both of 12 the above preallocation routines for simplicity. 13 14 Options Database Keys: 15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 16 17 Developer Notes: Subclasses include MATAIJCUSP, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 18 enough exist. 19 20 Level: beginner 21 22 .seealso: MatCreateMPIAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 23 M*/ 24 25 /*MC 26 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 27 28 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 29 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 30 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 31 for communicators controlling multiple processes. It is recommended that you call both of 32 the above preallocation routines for simplicity. 33 34 Options Database Keys: 35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 36 37 Level: beginner 38 39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 40 M*/ 41 42 #undef __FUNCT__ 43 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ" 44 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows) 45 { 46 PetscErrorCode ierr; 47 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 48 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 49 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 50 const PetscInt *ia,*ib; 51 const MatScalar *aa,*bb; 52 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 53 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 54 55 PetscFunctionBegin; 56 *keptrows = 0; 57 ia = a->i; 58 ib = b->i; 59 for (i=0; i<m; i++) { 60 na = ia[i+1] - ia[i]; 61 nb = ib[i+1] - ib[i]; 62 if (!na && !nb) { 63 cnt++; 64 goto ok1; 65 } 66 aa = a->a + ia[i]; 67 for (j=0; j<na; j++) { 68 if (aa[j] != 0.0) goto ok1; 69 } 70 bb = b->a + ib[i]; 71 for (j=0; j <nb; j++) { 72 if (bb[j] != 0.0) goto ok1; 73 } 74 cnt++; 75 ok1:; 76 } 77 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr); 78 if (!n0rows) PetscFunctionReturn(0); 79 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 80 cnt = 0; 81 for (i=0; i<m; i++) { 82 na = ia[i+1] - ia[i]; 83 nb = ib[i+1] - ib[i]; 84 if (!na && !nb) continue; 85 aa = a->a + ia[i]; 86 for(j=0; j<na;j++) { 87 if (aa[j] != 0.0) { 88 rows[cnt++] = rstart + i; 89 goto ok2; 90 } 91 } 92 bb = b->a + ib[i]; 93 for (j=0; j<nb; j++) { 94 if (bb[j] != 0.0) { 95 rows[cnt++] = rstart + i; 96 goto ok2; 97 } 98 } 99 ok2:; 100 } 101 ierr = ISCreateGeneral(PETSC_COMM_WORLD,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 102 PetscFunctionReturn(0); 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 107 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 108 { 109 PetscErrorCode ierr; 110 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 111 PetscInt i,n,*garray = aij->garray; 112 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 113 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 114 PetscReal *work; 115 116 PetscFunctionBegin; 117 ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr); 118 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 119 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 120 if (type == NORM_2) { 121 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 122 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 123 } 124 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 125 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 126 } 127 } else if (type == NORM_1) { 128 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 129 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 130 } 131 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 132 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 133 } 134 } else if (type == NORM_INFINITY) { 135 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 136 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 137 } 138 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 139 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 140 } 141 142 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType"); 143 if (type == NORM_INFINITY) { 144 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 145 } else { 146 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 147 } 148 ierr = PetscFree(work);CHKERRQ(ierr); 149 if (type == NORM_2) { 150 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 151 } 152 PetscFunctionReturn(0); 153 } 154 155 #undef __FUNCT__ 156 #define __FUNCT__ "MatDistribute_MPIAIJ" 157 /* 158 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 159 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 160 161 Only for square matrices 162 */ 163 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 164 { 165 PetscMPIInt rank,size; 166 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 167 PetscErrorCode ierr; 168 Mat mat; 169 Mat_SeqAIJ *gmata; 170 PetscMPIInt tag; 171 MPI_Status status; 172 PetscBool aij; 173 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 174 175 PetscFunctionBegin; 176 CHKMEMQ; 177 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 178 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 179 if (!rank) { 180 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 181 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 182 } 183 if (reuse == MAT_INITIAL_MATRIX) { 184 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 185 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 186 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 187 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 188 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 189 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 190 rowners[0] = 0; 191 for (i=2; i<=size; i++) { 192 rowners[i] += rowners[i-1]; 193 } 194 rstart = rowners[rank]; 195 rend = rowners[rank+1]; 196 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 197 if (!rank) { 198 gmata = (Mat_SeqAIJ*) gmat->data; 199 /* send row lengths to all processors */ 200 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 201 for (i=1; i<size; i++) { 202 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 203 } 204 /* determine number diagonal and off-diagonal counts */ 205 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 206 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 207 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 208 jj = 0; 209 for (i=0; i<m; i++) { 210 for (j=0; j<dlens[i]; j++) { 211 if (gmata->j[jj] < rstart) ld[i]++; 212 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 213 jj++; 214 } 215 } 216 /* send column indices to other processes */ 217 for (i=1; i<size; i++) { 218 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 219 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 220 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 221 } 222 223 /* send numerical values to other processes */ 224 for (i=1; i<size; i++) { 225 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 226 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 227 } 228 gmataa = gmata->a; 229 gmataj = gmata->j; 230 231 } else { 232 /* receive row lengths */ 233 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 234 /* receive column indices */ 235 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 236 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 237 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 238 /* determine number diagonal and off-diagonal counts */ 239 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 240 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 241 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 242 jj = 0; 243 for (i=0; i<m; i++) { 244 for (j=0; j<dlens[i]; j++) { 245 if (gmataj[jj] < rstart) ld[i]++; 246 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 247 jj++; 248 } 249 } 250 /* receive numerical values */ 251 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 252 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 253 } 254 /* set preallocation */ 255 for (i=0; i<m; i++) { 256 dlens[i] -= olens[i]; 257 } 258 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 259 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 260 261 for (i=0; i<m; i++) { 262 dlens[i] += olens[i]; 263 } 264 cnt = 0; 265 for (i=0; i<m; i++) { 266 row = rstart + i; 267 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 268 cnt += dlens[i]; 269 } 270 if (rank) { 271 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 272 } 273 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 274 ierr = PetscFree(rowners);CHKERRQ(ierr); 275 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 276 *inmat = mat; 277 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 278 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 279 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 280 mat = *inmat; 281 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 282 if (!rank) { 283 /* send numerical values to other processes */ 284 gmata = (Mat_SeqAIJ*) gmat->data; 285 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 286 gmataa = gmata->a; 287 for (i=1; i<size; i++) { 288 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 289 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 290 } 291 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 292 } else { 293 /* receive numerical values from process 0*/ 294 nz = Ad->nz + Ao->nz; 295 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 296 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 297 } 298 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 299 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 300 ad = Ad->a; 301 ao = Ao->a; 302 if (mat->rmap->n) { 303 i = 0; 304 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 305 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 306 } 307 for (i=1; i<mat->rmap->n; i++) { 308 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 309 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 310 } 311 i--; 312 if (mat->rmap->n) { 313 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 314 } 315 if (rank) { 316 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 317 } 318 } 319 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 320 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 321 CHKMEMQ; 322 PetscFunctionReturn(0); 323 } 324 325 /* 326 Local utility routine that creates a mapping from the global column 327 number to the local number in the off-diagonal part of the local 328 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 329 a slightly higher hash table cost; without it it is not scalable (each processor 330 has an order N integer array but is fast to acess. 331 */ 332 #undef __FUNCT__ 333 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 334 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 335 { 336 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 337 PetscErrorCode ierr; 338 PetscInt n = aij->B->cmap->n,i; 339 340 PetscFunctionBegin; 341 #if defined (PETSC_USE_CTABLE) 342 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 343 for (i=0; i<n; i++){ 344 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 345 } 346 #else 347 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 348 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 349 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 350 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 351 #endif 352 PetscFunctionReturn(0); 353 } 354 355 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 356 { \ 357 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 358 lastcol1 = col;\ 359 while (high1-low1 > 5) { \ 360 t = (low1+high1)/2; \ 361 if (rp1[t] > col) high1 = t; \ 362 else low1 = t; \ 363 } \ 364 for (_i=low1; _i<high1; _i++) { \ 365 if (rp1[_i] > col) break; \ 366 if (rp1[_i] == col) { \ 367 if (addv == ADD_VALUES) ap1[_i] += value; \ 368 else ap1[_i] = value; \ 369 goto a_noinsert; \ 370 } \ 371 } \ 372 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 373 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 374 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 375 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 376 N = nrow1++ - 1; a->nz++; high1++; \ 377 /* shift up all the later entries in this row */ \ 378 for (ii=N; ii>=_i; ii--) { \ 379 rp1[ii+1] = rp1[ii]; \ 380 ap1[ii+1] = ap1[ii]; \ 381 } \ 382 rp1[_i] = col; \ 383 ap1[_i] = value; \ 384 a_noinsert: ; \ 385 ailen[row] = nrow1; \ 386 } 387 388 389 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 390 { \ 391 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 392 lastcol2 = col;\ 393 while (high2-low2 > 5) { \ 394 t = (low2+high2)/2; \ 395 if (rp2[t] > col) high2 = t; \ 396 else low2 = t; \ 397 } \ 398 for (_i=low2; _i<high2; _i++) { \ 399 if (rp2[_i] > col) break; \ 400 if (rp2[_i] == col) { \ 401 if (addv == ADD_VALUES) ap2[_i] += value; \ 402 else ap2[_i] = value; \ 403 goto b_noinsert; \ 404 } \ 405 } \ 406 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 407 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 408 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 409 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 410 N = nrow2++ - 1; b->nz++; high2++; \ 411 /* shift up all the later entries in this row */ \ 412 for (ii=N; ii>=_i; ii--) { \ 413 rp2[ii+1] = rp2[ii]; \ 414 ap2[ii+1] = ap2[ii]; \ 415 } \ 416 rp2[_i] = col; \ 417 ap2[_i] = value; \ 418 b_noinsert: ; \ 419 bilen[row] = nrow2; \ 420 } 421 422 #undef __FUNCT__ 423 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 424 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 425 { 426 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 427 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 428 PetscErrorCode ierr; 429 PetscInt l,*garray = mat->garray,diag; 430 431 PetscFunctionBegin; 432 /* code only works for square matrices A */ 433 434 /* find size of row to the left of the diagonal part */ 435 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 436 row = row - diag; 437 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 438 if (garray[b->j[b->i[row]+l]] > diag) break; 439 } 440 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 441 442 /* diagonal part */ 443 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 444 445 /* right of diagonal part */ 446 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 447 PetscFunctionReturn(0); 448 } 449 450 #undef __FUNCT__ 451 #define __FUNCT__ "MatSetValues_MPIAIJ" 452 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 453 { 454 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 455 PetscScalar value; 456 PetscErrorCode ierr; 457 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 458 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 459 PetscBool roworiented = aij->roworiented; 460 461 /* Some Variables required in the macro */ 462 Mat A = aij->A; 463 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 464 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 465 MatScalar *aa = a->a; 466 PetscBool ignorezeroentries = a->ignorezeroentries; 467 Mat B = aij->B; 468 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 469 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 470 MatScalar *ba = b->a; 471 472 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 473 PetscInt nonew = a->nonew; 474 MatScalar *ap1,*ap2; 475 476 PetscFunctionBegin; 477 if (v) PetscValidScalarPointer(v,6); 478 for (i=0; i<m; i++) { 479 if (im[i] < 0) continue; 480 #if defined(PETSC_USE_DEBUG) 481 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 482 #endif 483 if (im[i] >= rstart && im[i] < rend) { 484 row = im[i] - rstart; 485 lastcol1 = -1; 486 rp1 = aj + ai[row]; 487 ap1 = aa + ai[row]; 488 rmax1 = aimax[row]; 489 nrow1 = ailen[row]; 490 low1 = 0; 491 high1 = nrow1; 492 lastcol2 = -1; 493 rp2 = bj + bi[row]; 494 ap2 = ba + bi[row]; 495 rmax2 = bimax[row]; 496 nrow2 = bilen[row]; 497 low2 = 0; 498 high2 = nrow2; 499 500 for (j=0; j<n; j++) { 501 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 502 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 503 if (in[j] >= cstart && in[j] < cend){ 504 col = in[j] - cstart; 505 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 506 } else if (in[j] < 0) continue; 507 #if defined(PETSC_USE_DEBUG) 508 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 509 #endif 510 else { 511 if (mat->was_assembled) { 512 if (!aij->colmap) { 513 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 514 } 515 #if defined (PETSC_USE_CTABLE) 516 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 517 col--; 518 #else 519 col = aij->colmap[in[j]] - 1; 520 #endif 521 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 522 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 523 col = in[j]; 524 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 525 B = aij->B; 526 b = (Mat_SeqAIJ*)B->data; 527 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 528 rp2 = bj + bi[row]; 529 ap2 = ba + bi[row]; 530 rmax2 = bimax[row]; 531 nrow2 = bilen[row]; 532 low2 = 0; 533 high2 = nrow2; 534 bm = aij->B->rmap->n; 535 ba = b->a; 536 } 537 } else col = in[j]; 538 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 539 } 540 } 541 } else { 542 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 543 if (!aij->donotstash) { 544 if (roworiented) { 545 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 546 } else { 547 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 548 } 549 } 550 } 551 } 552 PetscFunctionReturn(0); 553 } 554 555 #undef __FUNCT__ 556 #define __FUNCT__ "MatGetValues_MPIAIJ" 557 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 558 { 559 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 560 PetscErrorCode ierr; 561 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 562 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 563 564 PetscFunctionBegin; 565 for (i=0; i<m; i++) { 566 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 567 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 568 if (idxm[i] >= rstart && idxm[i] < rend) { 569 row = idxm[i] - rstart; 570 for (j=0; j<n; j++) { 571 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 572 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 573 if (idxn[j] >= cstart && idxn[j] < cend){ 574 col = idxn[j] - cstart; 575 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 576 } else { 577 if (!aij->colmap) { 578 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 579 } 580 #if defined (PETSC_USE_CTABLE) 581 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 582 col --; 583 #else 584 col = aij->colmap[idxn[j]] - 1; 585 #endif 586 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 587 else { 588 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 589 } 590 } 591 } 592 } else { 593 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 594 } 595 } 596 PetscFunctionReturn(0); 597 } 598 599 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 600 601 #undef __FUNCT__ 602 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 603 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 604 { 605 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 606 PetscErrorCode ierr; 607 PetscInt nstash,reallocs; 608 InsertMode addv; 609 610 PetscFunctionBegin; 611 if (aij->donotstash || mat->nooffprocentries) { 612 PetscFunctionReturn(0); 613 } 614 615 /* make sure all processors are either in INSERTMODE or ADDMODE */ 616 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 617 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 618 mat->insertmode = addv; /* in case this processor had no cache */ 619 620 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 621 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 622 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 623 PetscFunctionReturn(0); 624 } 625 626 #undef __FUNCT__ 627 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 628 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 629 { 630 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 631 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 632 PetscErrorCode ierr; 633 PetscMPIInt n; 634 PetscInt i,j,rstart,ncols,flg; 635 PetscInt *row,*col; 636 PetscBool other_disassembled; 637 PetscScalar *val; 638 InsertMode addv = mat->insertmode; 639 640 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 641 PetscFunctionBegin; 642 if (!aij->donotstash && !mat->nooffprocentries) { 643 while (1) { 644 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 645 if (!flg) break; 646 647 for (i=0; i<n;) { 648 /* Now identify the consecutive vals belonging to the same row */ 649 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 650 if (j < n) ncols = j-i; 651 else ncols = n-i; 652 /* Now assemble all these values with a single function call */ 653 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 654 i = j; 655 } 656 } 657 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 658 } 659 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 660 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 661 662 /* determine if any processor has disassembled, if so we must 663 also disassemble ourselfs, in order that we may reassemble. */ 664 /* 665 if nonzero structure of submatrix B cannot change then we know that 666 no processor disassembled thus we can skip this stuff 667 */ 668 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 669 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 670 if (mat->was_assembled && !other_disassembled) { 671 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 672 } 673 } 674 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 675 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 676 } 677 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 678 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 679 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 680 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 681 682 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 683 aij->rowvalues = 0; 684 685 /* used by MatAXPY() */ 686 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 687 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 688 689 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 690 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 691 PetscFunctionReturn(0); 692 } 693 694 #undef __FUNCT__ 695 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 696 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 697 { 698 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 699 PetscErrorCode ierr; 700 701 PetscFunctionBegin; 702 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 703 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 704 PetscFunctionReturn(0); 705 } 706 707 #undef __FUNCT__ 708 #define __FUNCT__ "MatZeroRows_MPIAIJ" 709 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 710 { 711 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 712 PetscErrorCode ierr; 713 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 714 PetscInt i,*owners = A->rmap->range; 715 PetscInt *nprocs,j,idx,nsends,row; 716 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 717 PetscInt *rvalues,count,base,slen,*source; 718 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 719 MPI_Comm comm = ((PetscObject)A)->comm; 720 MPI_Request *send_waits,*recv_waits; 721 MPI_Status recv_status,*send_status; 722 const PetscScalar *xx; 723 PetscScalar *bb; 724 #if defined(PETSC_DEBUG) 725 PetscBool found = PETSC_FALSE; 726 #endif 727 728 PetscFunctionBegin; 729 /* first count number of contributors to each processor */ 730 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 731 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 732 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 733 j = 0; 734 for (i=0; i<N; i++) { 735 if (lastidx > (idx = rows[i])) j = 0; 736 lastidx = idx; 737 for (; j<size; j++) { 738 if (idx >= owners[j] && idx < owners[j+1]) { 739 nprocs[2*j]++; 740 nprocs[2*j+1] = 1; 741 owner[i] = j; 742 #if defined(PETSC_DEBUG) 743 found = PETSC_TRUE; 744 #endif 745 break; 746 } 747 } 748 #if defined(PETSC_DEBUG) 749 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 750 found = PETSC_FALSE; 751 #endif 752 } 753 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 754 755 if (A->nooffproczerorows) { 756 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 757 nrecvs = nsends; 758 nmax = N; 759 } else { 760 /* inform other processors of number of messages and max length*/ 761 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 762 } 763 764 /* post receives: */ 765 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 766 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 767 for (i=0; i<nrecvs; i++) { 768 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 769 } 770 771 /* do sends: 772 1) starts[i] gives the starting index in svalues for stuff going to 773 the ith processor 774 */ 775 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 776 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 777 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 778 starts[0] = 0; 779 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 780 for (i=0; i<N; i++) { 781 svalues[starts[owner[i]]++] = rows[i]; 782 } 783 784 starts[0] = 0; 785 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 786 count = 0; 787 for (i=0; i<size; i++) { 788 if (nprocs[2*i+1]) { 789 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 790 } 791 } 792 ierr = PetscFree(starts);CHKERRQ(ierr); 793 794 base = owners[rank]; 795 796 /* wait on receives */ 797 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 798 count = nrecvs; slen = 0; 799 while (count) { 800 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 801 /* unpack receives into our local space */ 802 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 803 source[imdex] = recv_status.MPI_SOURCE; 804 lens[imdex] = n; 805 slen += n; 806 count--; 807 } 808 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 809 810 /* move the data into the send scatter */ 811 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 812 count = 0; 813 for (i=0; i<nrecvs; i++) { 814 values = rvalues + i*nmax; 815 for (j=0; j<lens[i]; j++) { 816 lrows[count++] = values[j] - base; 817 } 818 } 819 ierr = PetscFree(rvalues);CHKERRQ(ierr); 820 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 821 ierr = PetscFree(owner);CHKERRQ(ierr); 822 ierr = PetscFree(nprocs);CHKERRQ(ierr); 823 824 /* fix right hand side if needed */ 825 if (x && b) { 826 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 827 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 828 for (i=0; i<slen; i++) { 829 bb[lrows[i]] = diag*xx[lrows[i]]; 830 } 831 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 832 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 833 } 834 /* 835 Zero the required rows. If the "diagonal block" of the matrix 836 is square and the user wishes to set the diagonal we use separate 837 code so that MatSetValues() is not called for each diagonal allocating 838 new memory, thus calling lots of mallocs and slowing things down. 839 840 */ 841 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 842 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 843 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 844 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 845 } else if (diag != 0.0) { 846 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 847 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 848 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 849 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 850 } 851 for (i = 0; i < slen; i++) { 852 row = lrows[i] + rstart; 853 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 854 } 855 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 856 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 857 } else { 858 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 859 } 860 ierr = PetscFree(lrows);CHKERRQ(ierr); 861 862 /* wait on sends */ 863 if (nsends) { 864 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 865 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 866 ierr = PetscFree(send_status);CHKERRQ(ierr); 867 } 868 ierr = PetscFree(send_waits);CHKERRQ(ierr); 869 ierr = PetscFree(svalues);CHKERRQ(ierr); 870 PetscFunctionReturn(0); 871 } 872 873 #undef __FUNCT__ 874 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 875 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 876 { 877 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 878 PetscErrorCode ierr; 879 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 880 PetscInt i,*owners = A->rmap->range; 881 PetscInt *nprocs,j,idx,nsends; 882 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 883 PetscInt *rvalues,count,base,slen,*source; 884 PetscInt *lens,*lrows,*values,m; 885 MPI_Comm comm = ((PetscObject)A)->comm; 886 MPI_Request *send_waits,*recv_waits; 887 MPI_Status recv_status,*send_status; 888 const PetscScalar *xx; 889 PetscScalar *bb,*mask; 890 Vec xmask,lmask; 891 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 892 const PetscInt *aj, *ii,*ridx; 893 PetscScalar *aa; 894 #if defined(PETSC_DEBUG) 895 PetscBool found = PETSC_FALSE; 896 #endif 897 898 PetscFunctionBegin; 899 /* first count number of contributors to each processor */ 900 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 901 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 902 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 903 j = 0; 904 for (i=0; i<N; i++) { 905 if (lastidx > (idx = rows[i])) j = 0; 906 lastidx = idx; 907 for (; j<size; j++) { 908 if (idx >= owners[j] && idx < owners[j+1]) { 909 nprocs[2*j]++; 910 nprocs[2*j+1] = 1; 911 owner[i] = j; 912 #if defined(PETSC_DEBUG) 913 found = PETSC_TRUE; 914 #endif 915 break; 916 } 917 } 918 #if defined(PETSC_DEBUG) 919 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 920 found = PETSC_FALSE; 921 #endif 922 } 923 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 924 925 /* inform other processors of number of messages and max length*/ 926 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 927 928 /* post receives: */ 929 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 930 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 931 for (i=0; i<nrecvs; i++) { 932 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 933 } 934 935 /* do sends: 936 1) starts[i] gives the starting index in svalues for stuff going to 937 the ith processor 938 */ 939 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 940 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 941 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 942 starts[0] = 0; 943 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 944 for (i=0; i<N; i++) { 945 svalues[starts[owner[i]]++] = rows[i]; 946 } 947 948 starts[0] = 0; 949 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 950 count = 0; 951 for (i=0; i<size; i++) { 952 if (nprocs[2*i+1]) { 953 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 954 } 955 } 956 ierr = PetscFree(starts);CHKERRQ(ierr); 957 958 base = owners[rank]; 959 960 /* wait on receives */ 961 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 962 count = nrecvs; slen = 0; 963 while (count) { 964 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 965 /* unpack receives into our local space */ 966 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 967 source[imdex] = recv_status.MPI_SOURCE; 968 lens[imdex] = n; 969 slen += n; 970 count--; 971 } 972 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 973 974 /* move the data into the send scatter */ 975 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 976 count = 0; 977 for (i=0; i<nrecvs; i++) { 978 values = rvalues + i*nmax; 979 for (j=0; j<lens[i]; j++) { 980 lrows[count++] = values[j] - base; 981 } 982 } 983 ierr = PetscFree(rvalues);CHKERRQ(ierr); 984 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 985 ierr = PetscFree(owner);CHKERRQ(ierr); 986 ierr = PetscFree(nprocs);CHKERRQ(ierr); 987 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 988 989 /* zero diagonal part of matrix */ 990 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 991 992 /* handle off diagonal part of matrix */ 993 ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr); 994 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 995 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 996 for (i=0; i<slen; i++) { 997 bb[lrows[i]] = 1; 998 } 999 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1000 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1001 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1002 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1003 if (x) { 1004 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1005 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1006 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1007 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1008 } 1009 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1010 1011 /* remove zeroed rows of off diagonal matrix */ 1012 ii = aij->i; 1013 for (i=0; i<slen; i++) { 1014 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1015 } 1016 1017 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1018 if (aij->compressedrow.use){ 1019 m = aij->compressedrow.nrows; 1020 ii = aij->compressedrow.i; 1021 ridx = aij->compressedrow.rindex; 1022 for (i=0; i<m; i++){ 1023 n = ii[i+1] - ii[i]; 1024 aj = aij->j + ii[i]; 1025 aa = aij->a + ii[i]; 1026 1027 for (j=0; j<n; j++) { 1028 if (PetscAbsScalar(mask[*aj])) { 1029 if (b) bb[*ridx] -= *aa*xx[*aj]; 1030 *aa = 0.0; 1031 } 1032 aa++; 1033 aj++; 1034 } 1035 ridx++; 1036 } 1037 } else { /* do not use compressed row format */ 1038 m = l->B->rmap->n; 1039 for (i=0; i<m; i++) { 1040 n = ii[i+1] - ii[i]; 1041 aj = aij->j + ii[i]; 1042 aa = aij->a + ii[i]; 1043 for (j=0; j<n; j++) { 1044 if (PetscAbsScalar(mask[*aj])) { 1045 if (b) bb[i] -= *aa*xx[*aj]; 1046 *aa = 0.0; 1047 } 1048 aa++; 1049 aj++; 1050 } 1051 } 1052 } 1053 if (x) { 1054 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1055 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1056 } 1057 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1058 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1059 ierr = PetscFree(lrows);CHKERRQ(ierr); 1060 1061 /* wait on sends */ 1062 if (nsends) { 1063 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1064 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1065 ierr = PetscFree(send_status);CHKERRQ(ierr); 1066 } 1067 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1068 ierr = PetscFree(svalues);CHKERRQ(ierr); 1069 1070 PetscFunctionReturn(0); 1071 } 1072 1073 #undef __FUNCT__ 1074 #define __FUNCT__ "MatMult_MPIAIJ" 1075 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1076 { 1077 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1078 PetscErrorCode ierr; 1079 PetscInt nt; 1080 1081 PetscFunctionBegin; 1082 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1083 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1084 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1085 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1086 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1087 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1088 PetscFunctionReturn(0); 1089 } 1090 1091 #undef __FUNCT__ 1092 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1093 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1094 { 1095 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1096 PetscErrorCode ierr; 1097 1098 PetscFunctionBegin; 1099 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1100 PetscFunctionReturn(0); 1101 } 1102 1103 #undef __FUNCT__ 1104 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1105 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1106 { 1107 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1108 PetscErrorCode ierr; 1109 1110 PetscFunctionBegin; 1111 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1112 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1113 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1114 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1115 PetscFunctionReturn(0); 1116 } 1117 1118 #undef __FUNCT__ 1119 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1120 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1121 { 1122 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1123 PetscErrorCode ierr; 1124 PetscBool merged; 1125 1126 PetscFunctionBegin; 1127 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1128 /* do nondiagonal part */ 1129 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1130 if (!merged) { 1131 /* send it on its way */ 1132 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1133 /* do local part */ 1134 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1135 /* receive remote parts: note this assumes the values are not actually */ 1136 /* added in yy until the next line, */ 1137 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1138 } else { 1139 /* do local part */ 1140 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1141 /* send it on its way */ 1142 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1143 /* values actually were received in the Begin() but we need to call this nop */ 1144 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1145 } 1146 PetscFunctionReturn(0); 1147 } 1148 1149 EXTERN_C_BEGIN 1150 #undef __FUNCT__ 1151 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1152 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1153 { 1154 MPI_Comm comm; 1155 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 1156 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1157 IS Me,Notme; 1158 PetscErrorCode ierr; 1159 PetscInt M,N,first,last,*notme,i; 1160 PetscMPIInt size; 1161 1162 PetscFunctionBegin; 1163 1164 /* Easy test: symmetric diagonal block */ 1165 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 1166 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1167 if (!*f) PetscFunctionReturn(0); 1168 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1169 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1170 if (size == 1) PetscFunctionReturn(0); 1171 1172 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1173 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1174 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1175 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1176 for (i=0; i<first; i++) notme[i] = i; 1177 for (i=last; i<M; i++) notme[i-last+first] = i; 1178 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1179 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1180 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1181 Aoff = Aoffs[0]; 1182 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1183 Boff = Boffs[0]; 1184 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1185 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1186 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1187 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1188 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1189 ierr = PetscFree(notme);CHKERRQ(ierr); 1190 PetscFunctionReturn(0); 1191 } 1192 EXTERN_C_END 1193 1194 #undef __FUNCT__ 1195 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1196 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1197 { 1198 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1199 PetscErrorCode ierr; 1200 1201 PetscFunctionBegin; 1202 /* do nondiagonal part */ 1203 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1204 /* send it on its way */ 1205 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1206 /* do local part */ 1207 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1208 /* receive remote parts */ 1209 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1210 PetscFunctionReturn(0); 1211 } 1212 1213 /* 1214 This only works correctly for square matrices where the subblock A->A is the 1215 diagonal block 1216 */ 1217 #undef __FUNCT__ 1218 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1219 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1220 { 1221 PetscErrorCode ierr; 1222 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1223 1224 PetscFunctionBegin; 1225 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1226 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1227 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1228 PetscFunctionReturn(0); 1229 } 1230 1231 #undef __FUNCT__ 1232 #define __FUNCT__ "MatScale_MPIAIJ" 1233 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1234 { 1235 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1236 PetscErrorCode ierr; 1237 1238 PetscFunctionBegin; 1239 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1240 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1241 PetscFunctionReturn(0); 1242 } 1243 1244 #undef __FUNCT__ 1245 #define __FUNCT__ "MatDestroy_MPIAIJ" 1246 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1247 { 1248 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1249 PetscErrorCode ierr; 1250 1251 PetscFunctionBegin; 1252 #if defined(PETSC_USE_LOG) 1253 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1254 #endif 1255 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1256 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1257 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1258 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1259 #if defined (PETSC_USE_CTABLE) 1260 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1261 #else 1262 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1263 #endif 1264 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1265 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1266 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1267 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1268 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1269 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1270 1271 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1272 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1273 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1274 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1275 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1276 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1277 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1278 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1279 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1280 PetscFunctionReturn(0); 1281 } 1282 1283 #undef __FUNCT__ 1284 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1285 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1286 { 1287 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1288 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 1289 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 1290 PetscErrorCode ierr; 1291 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1292 int fd; 1293 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1294 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1295 PetscScalar *column_values; 1296 PetscInt message_count,flowcontrolcount; 1297 1298 PetscFunctionBegin; 1299 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1300 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1301 nz = A->nz + B->nz; 1302 if (!rank) { 1303 header[0] = MAT_FILE_CLASSID; 1304 header[1] = mat->rmap->N; 1305 header[2] = mat->cmap->N; 1306 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1307 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1308 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1309 /* get largest number of rows any processor has */ 1310 rlen = mat->rmap->n; 1311 range = mat->rmap->range; 1312 for (i=1; i<size; i++) { 1313 rlen = PetscMax(rlen,range[i+1] - range[i]); 1314 } 1315 } else { 1316 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1317 rlen = mat->rmap->n; 1318 } 1319 1320 /* load up the local row counts */ 1321 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1322 for (i=0; i<mat->rmap->n; i++) { 1323 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1324 } 1325 1326 /* store the row lengths to the file */ 1327 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1328 if (!rank) { 1329 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1330 for (i=1; i<size; i++) { 1331 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1332 rlen = range[i+1] - range[i]; 1333 ierr = MPILong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1334 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1335 } 1336 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1337 } else { 1338 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1339 ierr = MPILong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1340 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1341 } 1342 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1343 1344 /* load up the local column indices */ 1345 nzmax = nz; /* )th processor needs space a largest processor needs */ 1346 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1347 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1348 cnt = 0; 1349 for (i=0; i<mat->rmap->n; i++) { 1350 for (j=B->i[i]; j<B->i[i+1]; j++) { 1351 if ( (col = garray[B->j[j]]) > cstart) break; 1352 column_indices[cnt++] = col; 1353 } 1354 for (k=A->i[i]; k<A->i[i+1]; k++) { 1355 column_indices[cnt++] = A->j[k] + cstart; 1356 } 1357 for (; j<B->i[i+1]; j++) { 1358 column_indices[cnt++] = garray[B->j[j]]; 1359 } 1360 } 1361 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1362 1363 /* store the column indices to the file */ 1364 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1365 if (!rank) { 1366 MPI_Status status; 1367 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1368 for (i=1; i<size; i++) { 1369 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1370 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1371 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1372 ierr = MPILong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1373 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1374 } 1375 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1376 } else { 1377 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1378 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1379 ierr = MPILong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1380 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1381 } 1382 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1383 1384 /* load up the local column values */ 1385 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1386 cnt = 0; 1387 for (i=0; i<mat->rmap->n; i++) { 1388 for (j=B->i[i]; j<B->i[i+1]; j++) { 1389 if ( garray[B->j[j]] > cstart) break; 1390 column_values[cnt++] = B->a[j]; 1391 } 1392 for (k=A->i[i]; k<A->i[i+1]; k++) { 1393 column_values[cnt++] = A->a[k]; 1394 } 1395 for (; j<B->i[i+1]; j++) { 1396 column_values[cnt++] = B->a[j]; 1397 } 1398 } 1399 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1400 1401 /* store the column values to the file */ 1402 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1403 if (!rank) { 1404 MPI_Status status; 1405 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1406 for (i=1; i<size; i++) { 1407 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1408 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1409 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1410 ierr = MPILong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1411 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1412 } 1413 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1414 } else { 1415 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1416 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1417 ierr = MPILong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1418 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1419 } 1420 ierr = PetscFree(column_values);CHKERRQ(ierr); 1421 PetscFunctionReturn(0); 1422 } 1423 1424 #undef __FUNCT__ 1425 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1426 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1427 { 1428 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1429 PetscErrorCode ierr; 1430 PetscMPIInt rank = aij->rank,size = aij->size; 1431 PetscBool isdraw,iascii,isbinary; 1432 PetscViewer sviewer; 1433 PetscViewerFormat format; 1434 1435 PetscFunctionBegin; 1436 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1437 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1438 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1439 if (iascii) { 1440 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1441 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1442 MatInfo info; 1443 PetscBool inodes; 1444 1445 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1446 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1447 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1448 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1449 if (!inodes) { 1450 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1451 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1452 } else { 1453 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1454 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1455 } 1456 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1457 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1458 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1459 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1460 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1461 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1462 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1463 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1464 PetscFunctionReturn(0); 1465 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1466 PetscInt inodecount,inodelimit,*inodes; 1467 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1468 if (inodes) { 1469 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1470 } else { 1471 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1472 } 1473 PetscFunctionReturn(0); 1474 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1475 PetscFunctionReturn(0); 1476 } 1477 } else if (isbinary) { 1478 if (size == 1) { 1479 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1480 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1481 } else { 1482 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1483 } 1484 PetscFunctionReturn(0); 1485 } else if (isdraw) { 1486 PetscDraw draw; 1487 PetscBool isnull; 1488 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1489 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1490 } 1491 1492 if (size == 1) { 1493 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1494 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1495 } else { 1496 /* assemble the entire matrix onto first processor. */ 1497 Mat A; 1498 Mat_SeqAIJ *Aloc; 1499 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1500 MatScalar *a; 1501 1502 if (mat->rmap->N > 1024) { 1503 PetscBool flg = PETSC_FALSE; 1504 1505 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1506 if (!flg) { 1507 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1508 } 1509 } 1510 1511 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1512 if (!rank) { 1513 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1514 } else { 1515 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1516 } 1517 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1518 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1519 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1520 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1521 1522 /* copy over the A part */ 1523 Aloc = (Mat_SeqAIJ*)aij->A->data; 1524 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1525 row = mat->rmap->rstart; 1526 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1527 for (i=0; i<m; i++) { 1528 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1529 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1530 } 1531 aj = Aloc->j; 1532 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1533 1534 /* copy over the B part */ 1535 Aloc = (Mat_SeqAIJ*)aij->B->data; 1536 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1537 row = mat->rmap->rstart; 1538 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1539 ct = cols; 1540 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1541 for (i=0; i<m; i++) { 1542 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1543 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1544 } 1545 ierr = PetscFree(ct);CHKERRQ(ierr); 1546 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1547 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1548 /* 1549 Everyone has to call to draw the matrix since the graphics waits are 1550 synchronized across all processors that share the PetscDraw object 1551 */ 1552 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1553 if (!rank) { 1554 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1555 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1556 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1557 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1558 } 1559 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1560 ierr = MatDestroy(&A);CHKERRQ(ierr); 1561 } 1562 PetscFunctionReturn(0); 1563 } 1564 1565 #undef __FUNCT__ 1566 #define __FUNCT__ "MatView_MPIAIJ" 1567 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1568 { 1569 PetscErrorCode ierr; 1570 PetscBool iascii,isdraw,issocket,isbinary; 1571 1572 PetscFunctionBegin; 1573 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1574 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1575 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1576 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1577 if (iascii || isdraw || isbinary || issocket) { 1578 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1579 } else { 1580 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1581 } 1582 PetscFunctionReturn(0); 1583 } 1584 1585 #undef __FUNCT__ 1586 #define __FUNCT__ "MatSOR_MPIAIJ" 1587 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1588 { 1589 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1590 PetscErrorCode ierr; 1591 Vec bb1 = 0; 1592 PetscBool hasop; 1593 1594 PetscFunctionBegin; 1595 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1596 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1597 } 1598 1599 if (flag == SOR_APPLY_UPPER) { 1600 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1601 PetscFunctionReturn(0); 1602 } 1603 1604 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1605 if (flag & SOR_ZERO_INITIAL_GUESS) { 1606 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1607 its--; 1608 } 1609 1610 while (its--) { 1611 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1612 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1613 1614 /* update rhs: bb1 = bb - B*x */ 1615 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1616 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1617 1618 /* local sweep */ 1619 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1620 } 1621 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1622 if (flag & SOR_ZERO_INITIAL_GUESS) { 1623 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1624 its--; 1625 } 1626 while (its--) { 1627 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1628 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1629 1630 /* update rhs: bb1 = bb - B*x */ 1631 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1632 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1633 1634 /* local sweep */ 1635 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1636 } 1637 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1638 if (flag & SOR_ZERO_INITIAL_GUESS) { 1639 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1640 its--; 1641 } 1642 while (its--) { 1643 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1644 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1645 1646 /* update rhs: bb1 = bb - B*x */ 1647 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1648 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1649 1650 /* local sweep */ 1651 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1652 } 1653 } else if (flag & SOR_EISENSTAT) { 1654 Vec xx1; 1655 1656 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1657 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1658 1659 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1660 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1661 if (!mat->diag) { 1662 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1663 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1664 } 1665 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1666 if (hasop) { 1667 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1668 } else { 1669 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1670 } 1671 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1672 1673 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1674 1675 /* local sweep */ 1676 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1677 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1678 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1679 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1680 1681 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1682 PetscFunctionReturn(0); 1683 } 1684 1685 #undef __FUNCT__ 1686 #define __FUNCT__ "MatPermute_MPIAIJ" 1687 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1688 { 1689 MPI_Comm comm,pcomm; 1690 PetscInt first,local_size,nrows; 1691 const PetscInt *rows; 1692 PetscMPIInt size; 1693 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1694 PetscErrorCode ierr; 1695 1696 PetscFunctionBegin; 1697 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1698 /* make a collective version of 'rowp' */ 1699 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 1700 if (pcomm==comm) { 1701 crowp = rowp; 1702 } else { 1703 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 1704 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 1705 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 1706 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 1707 } 1708 /* collect the global row permutation and invert it */ 1709 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1710 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1711 if (pcomm!=comm) { 1712 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 1713 } 1714 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1715 /* get the local target indices */ 1716 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1717 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 1718 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1719 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 1720 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1721 ierr = ISDestroy(&irowp);CHKERRQ(ierr); 1722 /* the column permutation is so much easier; 1723 make a local version of 'colp' and invert it */ 1724 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 1725 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 1726 if (size==1) { 1727 lcolp = colp; 1728 } else { 1729 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 1730 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 1731 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr); 1732 } 1733 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1734 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1735 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 1736 if (size>1) { 1737 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 1738 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 1739 } 1740 /* now we just get the submatrix */ 1741 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1742 /* clean up */ 1743 ierr = ISDestroy(&lrowp);CHKERRQ(ierr); 1744 ierr = ISDestroy(&icolp);CHKERRQ(ierr); 1745 PetscFunctionReturn(0); 1746 } 1747 1748 #undef __FUNCT__ 1749 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1750 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1751 { 1752 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1753 Mat A = mat->A,B = mat->B; 1754 PetscErrorCode ierr; 1755 PetscReal isend[5],irecv[5]; 1756 1757 PetscFunctionBegin; 1758 info->block_size = 1.0; 1759 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1760 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1761 isend[3] = info->memory; isend[4] = info->mallocs; 1762 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1763 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1764 isend[3] += info->memory; isend[4] += info->mallocs; 1765 if (flag == MAT_LOCAL) { 1766 info->nz_used = isend[0]; 1767 info->nz_allocated = isend[1]; 1768 info->nz_unneeded = isend[2]; 1769 info->memory = isend[3]; 1770 info->mallocs = isend[4]; 1771 } else if (flag == MAT_GLOBAL_MAX) { 1772 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1773 info->nz_used = irecv[0]; 1774 info->nz_allocated = irecv[1]; 1775 info->nz_unneeded = irecv[2]; 1776 info->memory = irecv[3]; 1777 info->mallocs = irecv[4]; 1778 } else if (flag == MAT_GLOBAL_SUM) { 1779 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1780 info->nz_used = irecv[0]; 1781 info->nz_allocated = irecv[1]; 1782 info->nz_unneeded = irecv[2]; 1783 info->memory = irecv[3]; 1784 info->mallocs = irecv[4]; 1785 } 1786 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1787 info->fill_ratio_needed = 0; 1788 info->factor_mallocs = 0; 1789 1790 PetscFunctionReturn(0); 1791 } 1792 1793 #undef __FUNCT__ 1794 #define __FUNCT__ "MatSetOption_MPIAIJ" 1795 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1796 { 1797 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1798 PetscErrorCode ierr; 1799 1800 PetscFunctionBegin; 1801 switch (op) { 1802 case MAT_NEW_NONZERO_LOCATIONS: 1803 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1804 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1805 case MAT_KEEP_NONZERO_PATTERN: 1806 case MAT_NEW_NONZERO_LOCATION_ERR: 1807 case MAT_USE_INODES: 1808 case MAT_IGNORE_ZERO_ENTRIES: 1809 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1810 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1811 break; 1812 case MAT_ROW_ORIENTED: 1813 a->roworiented = flg; 1814 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1815 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1816 break; 1817 case MAT_NEW_DIAGONALS: 1818 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1819 break; 1820 case MAT_IGNORE_OFF_PROC_ENTRIES: 1821 a->donotstash = flg; 1822 break; 1823 case MAT_SPD: 1824 A->spd_set = PETSC_TRUE; 1825 A->spd = flg; 1826 if (flg) { 1827 A->symmetric = PETSC_TRUE; 1828 A->structurally_symmetric = PETSC_TRUE; 1829 A->symmetric_set = PETSC_TRUE; 1830 A->structurally_symmetric_set = PETSC_TRUE; 1831 } 1832 break; 1833 case MAT_SYMMETRIC: 1834 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1835 break; 1836 case MAT_STRUCTURALLY_SYMMETRIC: 1837 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1838 break; 1839 case MAT_HERMITIAN: 1840 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1841 break; 1842 case MAT_SYMMETRY_ETERNAL: 1843 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1844 break; 1845 default: 1846 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1847 } 1848 PetscFunctionReturn(0); 1849 } 1850 1851 #undef __FUNCT__ 1852 #define __FUNCT__ "MatGetRow_MPIAIJ" 1853 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1854 { 1855 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1856 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1857 PetscErrorCode ierr; 1858 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1859 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1860 PetscInt *cmap,*idx_p; 1861 1862 PetscFunctionBegin; 1863 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1864 mat->getrowactive = PETSC_TRUE; 1865 1866 if (!mat->rowvalues && (idx || v)) { 1867 /* 1868 allocate enough space to hold information from the longest row. 1869 */ 1870 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1871 PetscInt max = 1,tmp; 1872 for (i=0; i<matin->rmap->n; i++) { 1873 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1874 if (max < tmp) { max = tmp; } 1875 } 1876 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1877 } 1878 1879 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1880 lrow = row - rstart; 1881 1882 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1883 if (!v) {pvA = 0; pvB = 0;} 1884 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1885 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1886 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1887 nztot = nzA + nzB; 1888 1889 cmap = mat->garray; 1890 if (v || idx) { 1891 if (nztot) { 1892 /* Sort by increasing column numbers, assuming A and B already sorted */ 1893 PetscInt imark = -1; 1894 if (v) { 1895 *v = v_p = mat->rowvalues; 1896 for (i=0; i<nzB; i++) { 1897 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1898 else break; 1899 } 1900 imark = i; 1901 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1902 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1903 } 1904 if (idx) { 1905 *idx = idx_p = mat->rowindices; 1906 if (imark > -1) { 1907 for (i=0; i<imark; i++) { 1908 idx_p[i] = cmap[cworkB[i]]; 1909 } 1910 } else { 1911 for (i=0; i<nzB; i++) { 1912 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1913 else break; 1914 } 1915 imark = i; 1916 } 1917 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1918 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1919 } 1920 } else { 1921 if (idx) *idx = 0; 1922 if (v) *v = 0; 1923 } 1924 } 1925 *nz = nztot; 1926 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1927 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1928 PetscFunctionReturn(0); 1929 } 1930 1931 #undef __FUNCT__ 1932 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1933 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1934 { 1935 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1936 1937 PetscFunctionBegin; 1938 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1939 aij->getrowactive = PETSC_FALSE; 1940 PetscFunctionReturn(0); 1941 } 1942 1943 #undef __FUNCT__ 1944 #define __FUNCT__ "MatNorm_MPIAIJ" 1945 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1946 { 1947 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1948 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1949 PetscErrorCode ierr; 1950 PetscInt i,j,cstart = mat->cmap->rstart; 1951 PetscReal sum = 0.0; 1952 MatScalar *v; 1953 1954 PetscFunctionBegin; 1955 if (aij->size == 1) { 1956 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1957 } else { 1958 if (type == NORM_FROBENIUS) { 1959 v = amat->a; 1960 for (i=0; i<amat->nz; i++) { 1961 #if defined(PETSC_USE_COMPLEX) 1962 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1963 #else 1964 sum += (*v)*(*v); v++; 1965 #endif 1966 } 1967 v = bmat->a; 1968 for (i=0; i<bmat->nz; i++) { 1969 #if defined(PETSC_USE_COMPLEX) 1970 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1971 #else 1972 sum += (*v)*(*v); v++; 1973 #endif 1974 } 1975 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1976 *norm = PetscSqrtReal(*norm); 1977 } else if (type == NORM_1) { /* max column norm */ 1978 PetscReal *tmp,*tmp2; 1979 PetscInt *jj,*garray = aij->garray; 1980 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1981 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1982 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1983 *norm = 0.0; 1984 v = amat->a; jj = amat->j; 1985 for (j=0; j<amat->nz; j++) { 1986 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1987 } 1988 v = bmat->a; jj = bmat->j; 1989 for (j=0; j<bmat->nz; j++) { 1990 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1991 } 1992 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1993 for (j=0; j<mat->cmap->N; j++) { 1994 if (tmp2[j] > *norm) *norm = tmp2[j]; 1995 } 1996 ierr = PetscFree(tmp);CHKERRQ(ierr); 1997 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1998 } else if (type == NORM_INFINITY) { /* max row norm */ 1999 PetscReal ntemp = 0.0; 2000 for (j=0; j<aij->A->rmap->n; j++) { 2001 v = amat->a + amat->i[j]; 2002 sum = 0.0; 2003 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 2004 sum += PetscAbsScalar(*v); v++; 2005 } 2006 v = bmat->a + bmat->i[j]; 2007 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2008 sum += PetscAbsScalar(*v); v++; 2009 } 2010 if (sum > ntemp) ntemp = sum; 2011 } 2012 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 2013 } else { 2014 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 2015 } 2016 } 2017 PetscFunctionReturn(0); 2018 } 2019 2020 #undef __FUNCT__ 2021 #define __FUNCT__ "MatTranspose_MPIAIJ" 2022 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2023 { 2024 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2025 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2026 PetscErrorCode ierr; 2027 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 2028 PetscInt cstart=A->cmap->rstart,ncol; 2029 Mat B; 2030 MatScalar *array; 2031 2032 PetscFunctionBegin; 2033 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2034 2035 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 2036 ai = Aloc->i; aj = Aloc->j; 2037 bi = Bloc->i; bj = Bloc->j; 2038 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2039 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 2040 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2041 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 2042 for (i=0; i<ai[ma]; i++){ 2043 d_nnz[aj[i]] ++; 2044 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2045 } 2046 2047 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 2048 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2049 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2050 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 2051 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2052 } else { 2053 B = *matout; 2054 } 2055 2056 /* copy over the A part */ 2057 array = Aloc->a; 2058 row = A->rmap->rstart; 2059 for (i=0; i<ma; i++) { 2060 ncol = ai[i+1]-ai[i]; 2061 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2062 row++; array += ncol; aj += ncol; 2063 } 2064 aj = Aloc->j; 2065 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2066 2067 /* copy over the B part */ 2068 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2069 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2070 array = Bloc->a; 2071 row = A->rmap->rstart; 2072 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 2073 cols_tmp = cols; 2074 for (i=0; i<mb; i++) { 2075 ncol = bi[i+1]-bi[i]; 2076 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2077 row++; array += ncol; cols_tmp += ncol; 2078 } 2079 ierr = PetscFree(cols);CHKERRQ(ierr); 2080 2081 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2082 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2083 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2084 *matout = B; 2085 } else { 2086 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2087 } 2088 PetscFunctionReturn(0); 2089 } 2090 2091 #undef __FUNCT__ 2092 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2093 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2094 { 2095 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2096 Mat a = aij->A,b = aij->B; 2097 PetscErrorCode ierr; 2098 PetscInt s1,s2,s3; 2099 2100 PetscFunctionBegin; 2101 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2102 if (rr) { 2103 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2104 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2105 /* Overlap communication with computation. */ 2106 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2107 } 2108 if (ll) { 2109 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2110 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2111 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2112 } 2113 /* scale the diagonal block */ 2114 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2115 2116 if (rr) { 2117 /* Do a scatter end and then right scale the off-diagonal block */ 2118 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2119 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2120 } 2121 2122 PetscFunctionReturn(0); 2123 } 2124 2125 #undef __FUNCT__ 2126 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 2127 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 2128 { 2129 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2130 PetscErrorCode ierr; 2131 2132 PetscFunctionBegin; 2133 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 2134 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 2135 ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr); 2136 ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr); 2137 PetscFunctionReturn(0); 2138 } 2139 #undef __FUNCT__ 2140 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2141 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2142 { 2143 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2144 PetscErrorCode ierr; 2145 2146 PetscFunctionBegin; 2147 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2148 PetscFunctionReturn(0); 2149 } 2150 2151 #undef __FUNCT__ 2152 #define __FUNCT__ "MatEqual_MPIAIJ" 2153 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2154 { 2155 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2156 Mat a,b,c,d; 2157 PetscBool flg; 2158 PetscErrorCode ierr; 2159 2160 PetscFunctionBegin; 2161 a = matA->A; b = matA->B; 2162 c = matB->A; d = matB->B; 2163 2164 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2165 if (flg) { 2166 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2167 } 2168 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2169 PetscFunctionReturn(0); 2170 } 2171 2172 #undef __FUNCT__ 2173 #define __FUNCT__ "MatCopy_MPIAIJ" 2174 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2175 { 2176 PetscErrorCode ierr; 2177 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2178 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2179 2180 PetscFunctionBegin; 2181 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2182 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2183 /* because of the column compression in the off-processor part of the matrix a->B, 2184 the number of columns in a->B and b->B may be different, hence we cannot call 2185 the MatCopy() directly on the two parts. If need be, we can provide a more 2186 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2187 then copying the submatrices */ 2188 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2189 } else { 2190 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2191 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2192 } 2193 PetscFunctionReturn(0); 2194 } 2195 2196 #undef __FUNCT__ 2197 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 2198 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 2199 { 2200 PetscErrorCode ierr; 2201 2202 PetscFunctionBegin; 2203 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2204 PetscFunctionReturn(0); 2205 } 2206 2207 #undef __FUNCT__ 2208 #define __FUNCT__ "MatAXPY_MPIAIJ" 2209 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2210 { 2211 PetscErrorCode ierr; 2212 PetscInt i; 2213 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 2214 PetscBLASInt bnz,one=1; 2215 Mat_SeqAIJ *x,*y; 2216 2217 PetscFunctionBegin; 2218 if (str == SAME_NONZERO_PATTERN) { 2219 PetscScalar alpha = a; 2220 x = (Mat_SeqAIJ *)xx->A->data; 2221 y = (Mat_SeqAIJ *)yy->A->data; 2222 bnz = PetscBLASIntCast(x->nz); 2223 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2224 x = (Mat_SeqAIJ *)xx->B->data; 2225 y = (Mat_SeqAIJ *)yy->B->data; 2226 bnz = PetscBLASIntCast(x->nz); 2227 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2228 } else if (str == SUBSET_NONZERO_PATTERN) { 2229 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2230 2231 x = (Mat_SeqAIJ *)xx->B->data; 2232 y = (Mat_SeqAIJ *)yy->B->data; 2233 if (y->xtoy && y->XtoY != xx->B) { 2234 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2235 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2236 } 2237 if (!y->xtoy) { /* get xtoy */ 2238 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2239 y->XtoY = xx->B; 2240 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2241 } 2242 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2243 } else { 2244 Mat B; 2245 PetscInt *nnz_d,*nnz_o; 2246 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2247 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2248 ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr); 2249 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2250 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2251 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2252 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2253 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);CHKERRQ(ierr); 2254 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2255 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2256 ierr = MatHeaderReplace(Y,B); 2257 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2258 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2259 } 2260 PetscFunctionReturn(0); 2261 } 2262 2263 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2264 2265 #undef __FUNCT__ 2266 #define __FUNCT__ "MatConjugate_MPIAIJ" 2267 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2268 { 2269 #if defined(PETSC_USE_COMPLEX) 2270 PetscErrorCode ierr; 2271 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2272 2273 PetscFunctionBegin; 2274 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2275 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2276 #else 2277 PetscFunctionBegin; 2278 #endif 2279 PetscFunctionReturn(0); 2280 } 2281 2282 #undef __FUNCT__ 2283 #define __FUNCT__ "MatRealPart_MPIAIJ" 2284 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2285 { 2286 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2287 PetscErrorCode ierr; 2288 2289 PetscFunctionBegin; 2290 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2291 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2292 PetscFunctionReturn(0); 2293 } 2294 2295 #undef __FUNCT__ 2296 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2297 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2298 { 2299 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2300 PetscErrorCode ierr; 2301 2302 PetscFunctionBegin; 2303 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2304 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2305 PetscFunctionReturn(0); 2306 } 2307 2308 #ifdef PETSC_HAVE_PBGL 2309 2310 #include <boost/parallel/mpi/bsp_process_group.hpp> 2311 #include <boost/graph/distributed/ilu_default_graph.hpp> 2312 #include <boost/graph/distributed/ilu_0_block.hpp> 2313 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2314 #include <boost/graph/distributed/petsc/interface.hpp> 2315 #include <boost/multi_array.hpp> 2316 #include <boost/parallel/distributed_property_map->hpp> 2317 2318 #undef __FUNCT__ 2319 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2320 /* 2321 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2322 */ 2323 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2324 { 2325 namespace petsc = boost::distributed::petsc; 2326 2327 namespace graph_dist = boost::graph::distributed; 2328 using boost::graph::distributed::ilu_default::process_group_type; 2329 using boost::graph::ilu_permuted; 2330 2331 PetscBool row_identity, col_identity; 2332 PetscContainer c; 2333 PetscInt m, n, M, N; 2334 PetscErrorCode ierr; 2335 2336 PetscFunctionBegin; 2337 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2338 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2339 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2340 if (!row_identity || !col_identity) { 2341 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2342 } 2343 2344 process_group_type pg; 2345 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2346 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2347 lgraph_type& level_graph = *lgraph_p; 2348 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2349 2350 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2351 ilu_permuted(level_graph); 2352 2353 /* put together the new matrix */ 2354 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 2355 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2356 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2357 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2358 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2359 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2360 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2361 2362 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 2363 ierr = PetscContainerSetPointer(c, lgraph_p); 2364 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2365 ierr = PetscContainerDestroy(&c); 2366 PetscFunctionReturn(0); 2367 } 2368 2369 #undef __FUNCT__ 2370 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2371 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2372 { 2373 PetscFunctionBegin; 2374 PetscFunctionReturn(0); 2375 } 2376 2377 #undef __FUNCT__ 2378 #define __FUNCT__ "MatSolve_MPIAIJ" 2379 /* 2380 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2381 */ 2382 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2383 { 2384 namespace graph_dist = boost::graph::distributed; 2385 2386 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2387 lgraph_type* lgraph_p; 2388 PetscContainer c; 2389 PetscErrorCode ierr; 2390 2391 PetscFunctionBegin; 2392 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 2393 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 2394 ierr = VecCopy(b, x);CHKERRQ(ierr); 2395 2396 PetscScalar* array_x; 2397 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2398 PetscInt sx; 2399 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2400 2401 PetscScalar* array_b; 2402 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2403 PetscInt sb; 2404 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2405 2406 lgraph_type& level_graph = *lgraph_p; 2407 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2408 2409 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2410 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2411 ref_x(array_x, boost::extents[num_vertices(graph)]); 2412 2413 typedef boost::iterator_property_map<array_ref_type::iterator, 2414 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2415 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2416 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2417 2418 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2419 2420 PetscFunctionReturn(0); 2421 } 2422 #endif 2423 2424 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2425 PetscInt nzlocal,nsends,nrecvs; 2426 PetscMPIInt *send_rank,*recv_rank; 2427 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2428 PetscScalar *sbuf_a,**rbuf_a; 2429 PetscErrorCode (*Destroy)(Mat); 2430 } Mat_Redundant; 2431 2432 #undef __FUNCT__ 2433 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2434 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2435 { 2436 PetscErrorCode ierr; 2437 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2438 PetscInt i; 2439 2440 PetscFunctionBegin; 2441 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2442 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2443 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2444 for (i=0; i<redund->nrecvs; i++){ 2445 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2446 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2447 } 2448 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2449 ierr = PetscFree(redund);CHKERRQ(ierr); 2450 PetscFunctionReturn(0); 2451 } 2452 2453 #undef __FUNCT__ 2454 #define __FUNCT__ "MatDestroy_MatRedundant" 2455 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2456 { 2457 PetscErrorCode ierr; 2458 PetscContainer container; 2459 Mat_Redundant *redund=PETSC_NULL; 2460 2461 PetscFunctionBegin; 2462 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2463 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2464 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2465 A->ops->destroy = redund->Destroy; 2466 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2467 if (A->ops->destroy) { 2468 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2469 } 2470 PetscFunctionReturn(0); 2471 } 2472 2473 #undef __FUNCT__ 2474 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2475 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2476 { 2477 PetscMPIInt rank,size; 2478 MPI_Comm comm=((PetscObject)mat)->comm; 2479 PetscErrorCode ierr; 2480 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2481 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2482 PetscInt *rowrange=mat->rmap->range; 2483 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2484 Mat A=aij->A,B=aij->B,C=*matredundant; 2485 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2486 PetscScalar *sbuf_a; 2487 PetscInt nzlocal=a->nz+b->nz; 2488 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2489 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2490 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2491 MatScalar *aworkA,*aworkB; 2492 PetscScalar *vals; 2493 PetscMPIInt tag1,tag2,tag3,imdex; 2494 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2495 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2496 MPI_Status recv_status,*send_status; 2497 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2498 PetscInt **rbuf_j=PETSC_NULL; 2499 PetscScalar **rbuf_a=PETSC_NULL; 2500 Mat_Redundant *redund=PETSC_NULL; 2501 PetscContainer container; 2502 2503 PetscFunctionBegin; 2504 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2505 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2506 2507 if (reuse == MAT_REUSE_MATRIX) { 2508 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2509 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2510 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2511 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2512 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2513 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2514 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2515 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2516 2517 nsends = redund->nsends; 2518 nrecvs = redund->nrecvs; 2519 send_rank = redund->send_rank; 2520 recv_rank = redund->recv_rank; 2521 sbuf_nz = redund->sbuf_nz; 2522 rbuf_nz = redund->rbuf_nz; 2523 sbuf_j = redund->sbuf_j; 2524 sbuf_a = redund->sbuf_a; 2525 rbuf_j = redund->rbuf_j; 2526 rbuf_a = redund->rbuf_a; 2527 } 2528 2529 if (reuse == MAT_INITIAL_MATRIX){ 2530 PetscMPIInt subrank,subsize; 2531 PetscInt nleftover,np_subcomm; 2532 /* get the destination processors' id send_rank, nsends and nrecvs */ 2533 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2534 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2535 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2536 np_subcomm = size/nsubcomm; 2537 nleftover = size - nsubcomm*np_subcomm; 2538 nsends = 0; nrecvs = 0; 2539 for (i=0; i<size; i++){ /* i=rank*/ 2540 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2541 send_rank[nsends] = i; nsends++; 2542 recv_rank[nrecvs++] = i; 2543 } 2544 } 2545 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2546 i = size-nleftover-1; 2547 j = 0; 2548 while (j < nsubcomm - nleftover){ 2549 send_rank[nsends++] = i; 2550 i--; j++; 2551 } 2552 } 2553 2554 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2555 for (i=0; i<nleftover; i++){ 2556 recv_rank[nrecvs++] = size-nleftover+i; 2557 } 2558 } 2559 2560 /* allocate sbuf_j, sbuf_a */ 2561 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2562 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2563 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2564 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2565 2566 /* copy mat's local entries into the buffers */ 2567 if (reuse == MAT_INITIAL_MATRIX){ 2568 rownz_max = 0; 2569 rptr = sbuf_j; 2570 cols = sbuf_j + rend-rstart + 1; 2571 vals = sbuf_a; 2572 rptr[0] = 0; 2573 for (i=0; i<rend-rstart; i++){ 2574 row = i + rstart; 2575 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2576 ncols = nzA + nzB; 2577 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2578 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2579 /* load the column indices for this row into cols */ 2580 lwrite = 0; 2581 for (l=0; l<nzB; l++) { 2582 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2583 vals[lwrite] = aworkB[l]; 2584 cols[lwrite++] = ctmp; 2585 } 2586 } 2587 for (l=0; l<nzA; l++){ 2588 vals[lwrite] = aworkA[l]; 2589 cols[lwrite++] = cstart + cworkA[l]; 2590 } 2591 for (l=0; l<nzB; l++) { 2592 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2593 vals[lwrite] = aworkB[l]; 2594 cols[lwrite++] = ctmp; 2595 } 2596 } 2597 vals += ncols; 2598 cols += ncols; 2599 rptr[i+1] = rptr[i] + ncols; 2600 if (rownz_max < ncols) rownz_max = ncols; 2601 } 2602 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2603 } else { /* only copy matrix values into sbuf_a */ 2604 rptr = sbuf_j; 2605 vals = sbuf_a; 2606 rptr[0] = 0; 2607 for (i=0; i<rend-rstart; i++){ 2608 row = i + rstart; 2609 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2610 ncols = nzA + nzB; 2611 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2612 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2613 lwrite = 0; 2614 for (l=0; l<nzB; l++) { 2615 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2616 } 2617 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2618 for (l=0; l<nzB; l++) { 2619 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2620 } 2621 vals += ncols; 2622 rptr[i+1] = rptr[i] + ncols; 2623 } 2624 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2625 2626 /* send nzlocal to others, and recv other's nzlocal */ 2627 /*--------------------------------------------------*/ 2628 if (reuse == MAT_INITIAL_MATRIX){ 2629 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2630 s_waits2 = s_waits3 + nsends; 2631 s_waits1 = s_waits2 + nsends; 2632 r_waits1 = s_waits1 + nsends; 2633 r_waits2 = r_waits1 + nrecvs; 2634 r_waits3 = r_waits2 + nrecvs; 2635 } else { 2636 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2637 r_waits3 = s_waits3 + nsends; 2638 } 2639 2640 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2641 if (reuse == MAT_INITIAL_MATRIX){ 2642 /* get new tags to keep the communication clean */ 2643 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2644 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2645 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2646 2647 /* post receives of other's nzlocal */ 2648 for (i=0; i<nrecvs; i++){ 2649 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2650 } 2651 /* send nzlocal to others */ 2652 for (i=0; i<nsends; i++){ 2653 sbuf_nz[i] = nzlocal; 2654 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2655 } 2656 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2657 count = nrecvs; 2658 while (count) { 2659 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2660 recv_rank[imdex] = recv_status.MPI_SOURCE; 2661 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2662 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2663 2664 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2665 rbuf_nz[imdex] += i + 2; 2666 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2667 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2668 count--; 2669 } 2670 /* wait on sends of nzlocal */ 2671 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2672 /* send mat->i,j to others, and recv from other's */ 2673 /*------------------------------------------------*/ 2674 for (i=0; i<nsends; i++){ 2675 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2676 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2677 } 2678 /* wait on receives of mat->i,j */ 2679 /*------------------------------*/ 2680 count = nrecvs; 2681 while (count) { 2682 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2683 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2684 count--; 2685 } 2686 /* wait on sends of mat->i,j */ 2687 /*---------------------------*/ 2688 if (nsends) { 2689 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2690 } 2691 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2692 2693 /* post receives, send and receive mat->a */ 2694 /*----------------------------------------*/ 2695 for (imdex=0; imdex<nrecvs; imdex++) { 2696 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2697 } 2698 for (i=0; i<nsends; i++){ 2699 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2700 } 2701 count = nrecvs; 2702 while (count) { 2703 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2704 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2705 count--; 2706 } 2707 if (nsends) { 2708 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2709 } 2710 2711 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2712 2713 /* create redundant matrix */ 2714 /*-------------------------*/ 2715 if (reuse == MAT_INITIAL_MATRIX){ 2716 /* compute rownz_max for preallocation */ 2717 for (imdex=0; imdex<nrecvs; imdex++){ 2718 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2719 rptr = rbuf_j[imdex]; 2720 for (i=0; i<j; i++){ 2721 ncols = rptr[i+1] - rptr[i]; 2722 if (rownz_max < ncols) rownz_max = ncols; 2723 } 2724 } 2725 2726 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2727 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2728 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2729 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2730 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2731 } else { 2732 C = *matredundant; 2733 } 2734 2735 /* insert local matrix entries */ 2736 rptr = sbuf_j; 2737 cols = sbuf_j + rend-rstart + 1; 2738 vals = sbuf_a; 2739 for (i=0; i<rend-rstart; i++){ 2740 row = i + rstart; 2741 ncols = rptr[i+1] - rptr[i]; 2742 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2743 vals += ncols; 2744 cols += ncols; 2745 } 2746 /* insert received matrix entries */ 2747 for (imdex=0; imdex<nrecvs; imdex++){ 2748 rstart = rowrange[recv_rank[imdex]]; 2749 rend = rowrange[recv_rank[imdex]+1]; 2750 rptr = rbuf_j[imdex]; 2751 cols = rbuf_j[imdex] + rend-rstart + 1; 2752 vals = rbuf_a[imdex]; 2753 for (i=0; i<rend-rstart; i++){ 2754 row = i + rstart; 2755 ncols = rptr[i+1] - rptr[i]; 2756 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2757 vals += ncols; 2758 cols += ncols; 2759 } 2760 } 2761 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2762 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2763 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2764 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2765 if (reuse == MAT_INITIAL_MATRIX) { 2766 PetscContainer container; 2767 *matredundant = C; 2768 /* create a supporting struct and attach it to C for reuse */ 2769 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2770 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2771 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2772 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2773 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2774 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 2775 2776 redund->nzlocal = nzlocal; 2777 redund->nsends = nsends; 2778 redund->nrecvs = nrecvs; 2779 redund->send_rank = send_rank; 2780 redund->recv_rank = recv_rank; 2781 redund->sbuf_nz = sbuf_nz; 2782 redund->rbuf_nz = rbuf_nz; 2783 redund->sbuf_j = sbuf_j; 2784 redund->sbuf_a = sbuf_a; 2785 redund->rbuf_j = rbuf_j; 2786 redund->rbuf_a = rbuf_a; 2787 2788 redund->Destroy = C->ops->destroy; 2789 C->ops->destroy = MatDestroy_MatRedundant; 2790 } 2791 PetscFunctionReturn(0); 2792 } 2793 2794 #undef __FUNCT__ 2795 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2796 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2797 { 2798 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2799 PetscErrorCode ierr; 2800 PetscInt i,*idxb = 0; 2801 PetscScalar *va,*vb; 2802 Vec vtmp; 2803 2804 PetscFunctionBegin; 2805 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2806 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2807 if (idx) { 2808 for (i=0; i<A->rmap->n; i++) { 2809 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2810 } 2811 } 2812 2813 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2814 if (idx) { 2815 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2816 } 2817 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2818 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2819 2820 for (i=0; i<A->rmap->n; i++){ 2821 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2822 va[i] = vb[i]; 2823 if (idx) idx[i] = a->garray[idxb[i]]; 2824 } 2825 } 2826 2827 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2828 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2829 ierr = PetscFree(idxb);CHKERRQ(ierr); 2830 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2831 PetscFunctionReturn(0); 2832 } 2833 2834 #undef __FUNCT__ 2835 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2836 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2837 { 2838 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2839 PetscErrorCode ierr; 2840 PetscInt i,*idxb = 0; 2841 PetscScalar *va,*vb; 2842 Vec vtmp; 2843 2844 PetscFunctionBegin; 2845 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2846 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2847 if (idx) { 2848 for (i=0; i<A->cmap->n; i++) { 2849 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2850 } 2851 } 2852 2853 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2854 if (idx) { 2855 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2856 } 2857 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2858 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2859 2860 for (i=0; i<A->rmap->n; i++){ 2861 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2862 va[i] = vb[i]; 2863 if (idx) idx[i] = a->garray[idxb[i]]; 2864 } 2865 } 2866 2867 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2868 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2869 ierr = PetscFree(idxb);CHKERRQ(ierr); 2870 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2871 PetscFunctionReturn(0); 2872 } 2873 2874 #undef __FUNCT__ 2875 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2876 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2877 { 2878 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2879 PetscInt n = A->rmap->n; 2880 PetscInt cstart = A->cmap->rstart; 2881 PetscInt *cmap = mat->garray; 2882 PetscInt *diagIdx, *offdiagIdx; 2883 Vec diagV, offdiagV; 2884 PetscScalar *a, *diagA, *offdiagA; 2885 PetscInt r; 2886 PetscErrorCode ierr; 2887 2888 PetscFunctionBegin; 2889 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2890 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2891 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2892 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2893 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2894 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2895 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2896 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2897 for(r = 0; r < n; ++r) { 2898 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2899 a[r] = diagA[r]; 2900 idx[r] = cstart + diagIdx[r]; 2901 } else { 2902 a[r] = offdiagA[r]; 2903 idx[r] = cmap[offdiagIdx[r]]; 2904 } 2905 } 2906 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2907 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2908 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2909 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2910 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2911 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2912 PetscFunctionReturn(0); 2913 } 2914 2915 #undef __FUNCT__ 2916 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2917 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2918 { 2919 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2920 PetscInt n = A->rmap->n; 2921 PetscInt cstart = A->cmap->rstart; 2922 PetscInt *cmap = mat->garray; 2923 PetscInt *diagIdx, *offdiagIdx; 2924 Vec diagV, offdiagV; 2925 PetscScalar *a, *diagA, *offdiagA; 2926 PetscInt r; 2927 PetscErrorCode ierr; 2928 2929 PetscFunctionBegin; 2930 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2931 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2932 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2933 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2934 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2935 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2936 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2937 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2938 for(r = 0; r < n; ++r) { 2939 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2940 a[r] = diagA[r]; 2941 idx[r] = cstart + diagIdx[r]; 2942 } else { 2943 a[r] = offdiagA[r]; 2944 idx[r] = cmap[offdiagIdx[r]]; 2945 } 2946 } 2947 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2948 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2949 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2950 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2951 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2952 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2953 PetscFunctionReturn(0); 2954 } 2955 2956 #undef __FUNCT__ 2957 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 2958 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2959 { 2960 PetscErrorCode ierr; 2961 Mat *dummy; 2962 2963 PetscFunctionBegin; 2964 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2965 *newmat = *dummy; 2966 ierr = PetscFree(dummy);CHKERRQ(ierr); 2967 PetscFunctionReturn(0); 2968 } 2969 2970 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2971 2972 #undef __FUNCT__ 2973 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 2974 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,PetscScalar **values) 2975 { 2976 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2977 PetscErrorCode ierr; 2978 2979 PetscFunctionBegin; 2980 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2981 PetscFunctionReturn(0); 2982 } 2983 2984 2985 /* -------------------------------------------------------------------*/ 2986 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2987 MatGetRow_MPIAIJ, 2988 MatRestoreRow_MPIAIJ, 2989 MatMult_MPIAIJ, 2990 /* 4*/ MatMultAdd_MPIAIJ, 2991 MatMultTranspose_MPIAIJ, 2992 MatMultTransposeAdd_MPIAIJ, 2993 #ifdef PETSC_HAVE_PBGL 2994 MatSolve_MPIAIJ, 2995 #else 2996 0, 2997 #endif 2998 0, 2999 0, 3000 /*10*/ 0, 3001 0, 3002 0, 3003 MatSOR_MPIAIJ, 3004 MatTranspose_MPIAIJ, 3005 /*15*/ MatGetInfo_MPIAIJ, 3006 MatEqual_MPIAIJ, 3007 MatGetDiagonal_MPIAIJ, 3008 MatDiagonalScale_MPIAIJ, 3009 MatNorm_MPIAIJ, 3010 /*20*/ MatAssemblyBegin_MPIAIJ, 3011 MatAssemblyEnd_MPIAIJ, 3012 MatSetOption_MPIAIJ, 3013 MatZeroEntries_MPIAIJ, 3014 /*24*/ MatZeroRows_MPIAIJ, 3015 0, 3016 #ifdef PETSC_HAVE_PBGL 3017 0, 3018 #else 3019 0, 3020 #endif 3021 0, 3022 0, 3023 /*29*/ MatSetUpPreallocation_MPIAIJ, 3024 #ifdef PETSC_HAVE_PBGL 3025 0, 3026 #else 3027 0, 3028 #endif 3029 0, 3030 0, 3031 0, 3032 /*34*/ MatDuplicate_MPIAIJ, 3033 0, 3034 0, 3035 0, 3036 0, 3037 /*39*/ MatAXPY_MPIAIJ, 3038 MatGetSubMatrices_MPIAIJ, 3039 MatIncreaseOverlap_MPIAIJ, 3040 MatGetValues_MPIAIJ, 3041 MatCopy_MPIAIJ, 3042 /*44*/ MatGetRowMax_MPIAIJ, 3043 MatScale_MPIAIJ, 3044 0, 3045 0, 3046 MatZeroRowsColumns_MPIAIJ, 3047 /*49*/ MatSetBlockSize_MPIAIJ, 3048 0, 3049 0, 3050 0, 3051 0, 3052 /*54*/ MatFDColoringCreate_MPIAIJ, 3053 0, 3054 MatSetUnfactored_MPIAIJ, 3055 MatPermute_MPIAIJ, 3056 0, 3057 /*59*/ MatGetSubMatrix_MPIAIJ, 3058 MatDestroy_MPIAIJ, 3059 MatView_MPIAIJ, 3060 0, 3061 0, 3062 /*64*/ 0, 3063 0, 3064 0, 3065 0, 3066 0, 3067 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3068 MatGetRowMinAbs_MPIAIJ, 3069 0, 3070 MatSetColoring_MPIAIJ, 3071 #if defined(PETSC_HAVE_ADIC) 3072 MatSetValuesAdic_MPIAIJ, 3073 #else 3074 0, 3075 #endif 3076 MatSetValuesAdifor_MPIAIJ, 3077 /*75*/ MatFDColoringApply_AIJ, 3078 0, 3079 0, 3080 0, 3081 0, 3082 /*80*/ 0, 3083 0, 3084 0, 3085 /*83*/ MatLoad_MPIAIJ, 3086 0, 3087 0, 3088 0, 3089 0, 3090 0, 3091 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3092 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3093 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3094 MatPtAP_Basic, 3095 MatPtAPSymbolic_MPIAIJ, 3096 /*94*/ MatPtAPNumeric_MPIAIJ, 3097 0, 3098 0, 3099 0, 3100 0, 3101 /*99*/ 0, 3102 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3103 MatPtAPNumeric_MPIAIJ_MPIAIJ, 3104 MatConjugate_MPIAIJ, 3105 0, 3106 /*104*/MatSetValuesRow_MPIAIJ, 3107 MatRealPart_MPIAIJ, 3108 MatImaginaryPart_MPIAIJ, 3109 0, 3110 0, 3111 /*109*/0, 3112 MatGetRedundantMatrix_MPIAIJ, 3113 MatGetRowMin_MPIAIJ, 3114 0, 3115 0, 3116 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3117 0, 3118 0, 3119 0, 3120 0, 3121 /*119*/0, 3122 0, 3123 0, 3124 0, 3125 MatGetMultiProcBlock_MPIAIJ, 3126 /*124*/MatFindNonZeroRows_MPIAIJ, 3127 MatGetColumnNorms_MPIAIJ, 3128 MatInvertBlockDiagonal_MPIAIJ, 3129 0, 3130 MatGetSubMatricesParallel_MPIAIJ, 3131 /*129*/0, 3132 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3133 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3134 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3135 0, 3136 /*134*/0, 3137 0, 3138 0, 3139 0, 3140 0 3141 }; 3142 3143 /* ----------------------------------------------------------------------------------------*/ 3144 3145 EXTERN_C_BEGIN 3146 #undef __FUNCT__ 3147 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3148 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3149 { 3150 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3151 PetscErrorCode ierr; 3152 3153 PetscFunctionBegin; 3154 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3155 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3156 PetscFunctionReturn(0); 3157 } 3158 EXTERN_C_END 3159 3160 EXTERN_C_BEGIN 3161 #undef __FUNCT__ 3162 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3163 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3164 { 3165 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3166 PetscErrorCode ierr; 3167 3168 PetscFunctionBegin; 3169 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3170 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3171 PetscFunctionReturn(0); 3172 } 3173 EXTERN_C_END 3174 3175 EXTERN_C_BEGIN 3176 #undef __FUNCT__ 3177 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3178 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3179 { 3180 Mat_MPIAIJ *b; 3181 PetscErrorCode ierr; 3182 PetscInt i; 3183 3184 PetscFunctionBegin; 3185 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3186 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3187 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3188 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3189 3190 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3191 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3192 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3193 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3194 if (d_nnz) { 3195 for (i=0; i<B->rmap->n; i++) { 3196 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 3197 } 3198 } 3199 if (o_nnz) { 3200 for (i=0; i<B->rmap->n; i++) { 3201 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 3202 } 3203 } 3204 b = (Mat_MPIAIJ*)B->data; 3205 3206 if (!B->preallocated) { 3207 /* Explicitly create 2 MATSEQAIJ matrices. */ 3208 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3209 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3210 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3211 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3212 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3213 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3214 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3215 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3216 } 3217 3218 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3219 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3220 B->preallocated = PETSC_TRUE; 3221 PetscFunctionReturn(0); 3222 } 3223 EXTERN_C_END 3224 3225 #undef __FUNCT__ 3226 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3227 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3228 { 3229 Mat mat; 3230 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3231 PetscErrorCode ierr; 3232 3233 PetscFunctionBegin; 3234 *newmat = 0; 3235 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3236 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3237 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3238 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3239 a = (Mat_MPIAIJ*)mat->data; 3240 3241 mat->factortype = matin->factortype; 3242 mat->rmap->bs = matin->rmap->bs; 3243 mat->assembled = PETSC_TRUE; 3244 mat->insertmode = NOT_SET_VALUES; 3245 mat->preallocated = PETSC_TRUE; 3246 3247 a->size = oldmat->size; 3248 a->rank = oldmat->rank; 3249 a->donotstash = oldmat->donotstash; 3250 a->roworiented = oldmat->roworiented; 3251 a->rowindices = 0; 3252 a->rowvalues = 0; 3253 a->getrowactive = PETSC_FALSE; 3254 3255 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3256 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3257 3258 if (oldmat->colmap) { 3259 #if defined (PETSC_USE_CTABLE) 3260 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3261 #else 3262 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3263 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3264 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3265 #endif 3266 } else a->colmap = 0; 3267 if (oldmat->garray) { 3268 PetscInt len; 3269 len = oldmat->B->cmap->n; 3270 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3271 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3272 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3273 } else a->garray = 0; 3274 3275 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3276 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3277 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3278 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3279 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3280 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3281 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3282 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3283 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3284 *newmat = mat; 3285 PetscFunctionReturn(0); 3286 } 3287 3288 3289 3290 #undef __FUNCT__ 3291 #define __FUNCT__ "MatLoad_MPIAIJ" 3292 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3293 { 3294 PetscScalar *vals,*svals; 3295 MPI_Comm comm = ((PetscObject)viewer)->comm; 3296 PetscErrorCode ierr; 3297 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3298 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3299 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3300 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 3301 PetscInt cend,cstart,n,*rowners,sizesset=1; 3302 int fd; 3303 3304 PetscFunctionBegin; 3305 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3306 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3307 if (!rank) { 3308 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3309 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3310 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3311 } 3312 3313 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3314 3315 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3316 M = header[1]; N = header[2]; 3317 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3318 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3319 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3320 3321 /* If global sizes are set, check if they are consistent with that given in the file */ 3322 if (sizesset) { 3323 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3324 } 3325 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3326 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3327 3328 /* determine ownership of all rows */ 3329 if (newMat->rmap->n < 0 ) m = M/size + ((M % size) > rank); /* PETSC_DECIDE */ 3330 else m = newMat->rmap->n; /* Set by user */ 3331 3332 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3333 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3334 3335 /* First process needs enough room for process with most rows */ 3336 if (!rank) { 3337 mmax = rowners[1]; 3338 for (i=2; i<size; i++) { 3339 mmax = PetscMax(mmax,rowners[i]); 3340 } 3341 } else mmax = m; 3342 3343 rowners[0] = 0; 3344 for (i=2; i<=size; i++) { 3345 rowners[i] += rowners[i-1]; 3346 } 3347 rstart = rowners[rank]; 3348 rend = rowners[rank+1]; 3349 3350 /* distribute row lengths to all processors */ 3351 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 3352 if (!rank) { 3353 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3354 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3355 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3356 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3357 for (j=0; j<m; j++) { 3358 procsnz[0] += ourlens[j]; 3359 } 3360 for (i=1; i<size; i++) { 3361 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3362 /* calculate the number of nonzeros on each processor */ 3363 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3364 procsnz[i] += rowlengths[j]; 3365 } 3366 ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3367 } 3368 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3369 } else { 3370 ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3371 } 3372 3373 if (!rank) { 3374 /* determine max buffer needed and allocate it */ 3375 maxnz = 0; 3376 for (i=0; i<size; i++) { 3377 maxnz = PetscMax(maxnz,procsnz[i]); 3378 } 3379 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3380 3381 /* read in my part of the matrix column indices */ 3382 nz = procsnz[0]; 3383 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3384 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3385 3386 /* read in every one elses and ship off */ 3387 for (i=1; i<size; i++) { 3388 nz = procsnz[i]; 3389 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3390 ierr = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3391 } 3392 ierr = PetscFree(cols);CHKERRQ(ierr); 3393 } else { 3394 /* determine buffer space needed for message */ 3395 nz = 0; 3396 for (i=0; i<m; i++) { 3397 nz += ourlens[i]; 3398 } 3399 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3400 3401 /* receive message of column indices*/ 3402 ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3403 } 3404 3405 /* determine column ownership if matrix is not square */ 3406 if (N != M) { 3407 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3408 else n = newMat->cmap->n; 3409 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3410 cstart = cend - n; 3411 } else { 3412 cstart = rstart; 3413 cend = rend; 3414 n = cend - cstart; 3415 } 3416 3417 /* loop over local rows, determining number of off diagonal entries */ 3418 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3419 jj = 0; 3420 for (i=0; i<m; i++) { 3421 for (j=0; j<ourlens[i]; j++) { 3422 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3423 jj++; 3424 } 3425 } 3426 3427 for (i=0; i<m; i++) { 3428 ourlens[i] -= offlens[i]; 3429 } 3430 if (!sizesset) { 3431 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3432 } 3433 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3434 3435 for (i=0; i<m; i++) { 3436 ourlens[i] += offlens[i]; 3437 } 3438 3439 if (!rank) { 3440 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3441 3442 /* read in my part of the matrix numerical values */ 3443 nz = procsnz[0]; 3444 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3445 3446 /* insert into matrix */ 3447 jj = rstart; 3448 smycols = mycols; 3449 svals = vals; 3450 for (i=0; i<m; i++) { 3451 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3452 smycols += ourlens[i]; 3453 svals += ourlens[i]; 3454 jj++; 3455 } 3456 3457 /* read in other processors and ship out */ 3458 for (i=1; i<size; i++) { 3459 nz = procsnz[i]; 3460 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3461 ierr = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3462 } 3463 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3464 } else { 3465 /* receive numeric values */ 3466 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3467 3468 /* receive message of values*/ 3469 ierr = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3470 3471 /* insert into matrix */ 3472 jj = rstart; 3473 smycols = mycols; 3474 svals = vals; 3475 for (i=0; i<m; i++) { 3476 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3477 smycols += ourlens[i]; 3478 svals += ourlens[i]; 3479 jj++; 3480 } 3481 } 3482 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3483 ierr = PetscFree(vals);CHKERRQ(ierr); 3484 ierr = PetscFree(mycols);CHKERRQ(ierr); 3485 ierr = PetscFree(rowners);CHKERRQ(ierr); 3486 3487 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3488 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3489 PetscFunctionReturn(0); 3490 } 3491 3492 #undef __FUNCT__ 3493 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3494 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3495 { 3496 PetscErrorCode ierr; 3497 IS iscol_local; 3498 PetscInt csize; 3499 3500 PetscFunctionBegin; 3501 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3502 if (call == MAT_REUSE_MATRIX) { 3503 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3504 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3505 } else { 3506 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3507 } 3508 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3509 if (call == MAT_INITIAL_MATRIX) { 3510 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3511 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3512 } 3513 PetscFunctionReturn(0); 3514 } 3515 3516 #undef __FUNCT__ 3517 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3518 /* 3519 Not great since it makes two copies of the submatrix, first an SeqAIJ 3520 in local and then by concatenating the local matrices the end result. 3521 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3522 3523 Note: This requires a sequential iscol with all indices. 3524 */ 3525 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3526 { 3527 PetscErrorCode ierr; 3528 PetscMPIInt rank,size; 3529 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3530 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3531 Mat *local,M,Mreuse; 3532 MatScalar *vwork,*aa; 3533 MPI_Comm comm = ((PetscObject)mat)->comm; 3534 Mat_SeqAIJ *aij; 3535 3536 3537 PetscFunctionBegin; 3538 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3539 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3540 3541 if (call == MAT_REUSE_MATRIX) { 3542 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3543 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3544 local = &Mreuse; 3545 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3546 } else { 3547 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3548 Mreuse = *local; 3549 ierr = PetscFree(local);CHKERRQ(ierr); 3550 } 3551 3552 /* 3553 m - number of local rows 3554 n - number of columns (same on all processors) 3555 rstart - first row in new global matrix generated 3556 */ 3557 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3558 if (call == MAT_INITIAL_MATRIX) { 3559 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3560 ii = aij->i; 3561 jj = aij->j; 3562 3563 /* 3564 Determine the number of non-zeros in the diagonal and off-diagonal 3565 portions of the matrix in order to do correct preallocation 3566 */ 3567 3568 /* first get start and end of "diagonal" columns */ 3569 if (csize == PETSC_DECIDE) { 3570 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3571 if (mglobal == n) { /* square matrix */ 3572 nlocal = m; 3573 } else { 3574 nlocal = n/size + ((n % size) > rank); 3575 } 3576 } else { 3577 nlocal = csize; 3578 } 3579 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3580 rstart = rend - nlocal; 3581 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3582 3583 /* next, compute all the lengths */ 3584 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3585 olens = dlens + m; 3586 for (i=0; i<m; i++) { 3587 jend = ii[i+1] - ii[i]; 3588 olen = 0; 3589 dlen = 0; 3590 for (j=0; j<jend; j++) { 3591 if (*jj < rstart || *jj >= rend) olen++; 3592 else dlen++; 3593 jj++; 3594 } 3595 olens[i] = olen; 3596 dlens[i] = dlen; 3597 } 3598 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3599 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3600 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3601 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3602 ierr = PetscFree(dlens);CHKERRQ(ierr); 3603 } else { 3604 PetscInt ml,nl; 3605 3606 M = *newmat; 3607 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3608 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3609 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3610 /* 3611 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3612 rather than the slower MatSetValues(). 3613 */ 3614 M->was_assembled = PETSC_TRUE; 3615 M->assembled = PETSC_FALSE; 3616 } 3617 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3618 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3619 ii = aij->i; 3620 jj = aij->j; 3621 aa = aij->a; 3622 for (i=0; i<m; i++) { 3623 row = rstart + i; 3624 nz = ii[i+1] - ii[i]; 3625 cwork = jj; jj += nz; 3626 vwork = aa; aa += nz; 3627 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3628 } 3629 3630 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3631 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3632 *newmat = M; 3633 3634 /* save submatrix used in processor for next request */ 3635 if (call == MAT_INITIAL_MATRIX) { 3636 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3637 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3638 } 3639 3640 PetscFunctionReturn(0); 3641 } 3642 3643 EXTERN_C_BEGIN 3644 #undef __FUNCT__ 3645 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3646 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3647 { 3648 PetscInt m,cstart, cend,j,nnz,i,d; 3649 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3650 const PetscInt *JJ; 3651 PetscScalar *values; 3652 PetscErrorCode ierr; 3653 3654 PetscFunctionBegin; 3655 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3656 3657 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3658 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3659 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3660 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3661 m = B->rmap->n; 3662 cstart = B->cmap->rstart; 3663 cend = B->cmap->rend; 3664 rstart = B->rmap->rstart; 3665 3666 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3667 3668 #if defined(PETSC_USE_DEBUGGING) 3669 for (i=0; i<m; i++) { 3670 nnz = Ii[i+1]- Ii[i]; 3671 JJ = J + Ii[i]; 3672 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3673 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3674 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3675 } 3676 #endif 3677 3678 for (i=0; i<m; i++) { 3679 nnz = Ii[i+1]- Ii[i]; 3680 JJ = J + Ii[i]; 3681 nnz_max = PetscMax(nnz_max,nnz); 3682 d = 0; 3683 for (j=0; j<nnz; j++) { 3684 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3685 } 3686 d_nnz[i] = d; 3687 o_nnz[i] = nnz - d; 3688 } 3689 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3690 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3691 3692 if (v) values = (PetscScalar*)v; 3693 else { 3694 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3695 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3696 } 3697 3698 for (i=0; i<m; i++) { 3699 ii = i + rstart; 3700 nnz = Ii[i+1]- Ii[i]; 3701 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3702 } 3703 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3704 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3705 3706 if (!v) { 3707 ierr = PetscFree(values);CHKERRQ(ierr); 3708 } 3709 PetscFunctionReturn(0); 3710 } 3711 EXTERN_C_END 3712 3713 #undef __FUNCT__ 3714 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3715 /*@ 3716 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3717 (the default parallel PETSc format). 3718 3719 Collective on MPI_Comm 3720 3721 Input Parameters: 3722 + B - the matrix 3723 . i - the indices into j for the start of each local row (starts with zero) 3724 . j - the column indices for each local row (starts with zero) 3725 - v - optional values in the matrix 3726 3727 Level: developer 3728 3729 Notes: 3730 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3731 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3732 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3733 3734 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3735 3736 The format which is used for the sparse matrix input, is equivalent to a 3737 row-major ordering.. i.e for the following matrix, the input data expected is 3738 as shown: 3739 3740 1 0 0 3741 2 0 3 P0 3742 ------- 3743 4 5 6 P1 3744 3745 Process0 [P0]: rows_owned=[0,1] 3746 i = {0,1,3} [size = nrow+1 = 2+1] 3747 j = {0,0,2} [size = nz = 6] 3748 v = {1,2,3} [size = nz = 6] 3749 3750 Process1 [P1]: rows_owned=[2] 3751 i = {0,3} [size = nrow+1 = 1+1] 3752 j = {0,1,2} [size = nz = 6] 3753 v = {4,5,6} [size = nz = 6] 3754 3755 .keywords: matrix, aij, compressed row, sparse, parallel 3756 3757 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3758 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3759 @*/ 3760 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3761 { 3762 PetscErrorCode ierr; 3763 3764 PetscFunctionBegin; 3765 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3766 PetscFunctionReturn(0); 3767 } 3768 3769 #undef __FUNCT__ 3770 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3771 /*@C 3772 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3773 (the default parallel PETSc format). For good matrix assembly performance 3774 the user should preallocate the matrix storage by setting the parameters 3775 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3776 performance can be increased by more than a factor of 50. 3777 3778 Collective on MPI_Comm 3779 3780 Input Parameters: 3781 + A - the matrix 3782 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3783 (same value is used for all local rows) 3784 . d_nnz - array containing the number of nonzeros in the various rows of the 3785 DIAGONAL portion of the local submatrix (possibly different for each row) 3786 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3787 The size of this array is equal to the number of local rows, i.e 'm'. 3788 You must leave room for the diagonal entry even if it is zero. 3789 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3790 submatrix (same value is used for all local rows). 3791 - o_nnz - array containing the number of nonzeros in the various rows of the 3792 OFF-DIAGONAL portion of the local submatrix (possibly different for 3793 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3794 structure. The size of this array is equal to the number 3795 of local rows, i.e 'm'. 3796 3797 If the *_nnz parameter is given then the *_nz parameter is ignored 3798 3799 The AIJ format (also called the Yale sparse matrix format or 3800 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3801 storage. The stored row and column indices begin with zero. 3802 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3803 3804 The parallel matrix is partitioned such that the first m0 rows belong to 3805 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3806 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3807 3808 The DIAGONAL portion of the local submatrix of a processor can be defined 3809 as the submatrix which is obtained by extraction the part corresponding to 3810 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3811 first row that belongs to the processor, r2 is the last row belonging to 3812 the this processor, and c1-c2 is range of indices of the local part of a 3813 vector suitable for applying the matrix to. This is an mxn matrix. In the 3814 common case of a square matrix, the row and column ranges are the same and 3815 the DIAGONAL part is also square. The remaining portion of the local 3816 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3817 3818 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3819 3820 You can call MatGetInfo() to get information on how effective the preallocation was; 3821 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3822 You can also run with the option -info and look for messages with the string 3823 malloc in them to see if additional memory allocation was needed. 3824 3825 Example usage: 3826 3827 Consider the following 8x8 matrix with 34 non-zero values, that is 3828 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3829 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3830 as follows: 3831 3832 .vb 3833 1 2 0 | 0 3 0 | 0 4 3834 Proc0 0 5 6 | 7 0 0 | 8 0 3835 9 0 10 | 11 0 0 | 12 0 3836 ------------------------------------- 3837 13 0 14 | 15 16 17 | 0 0 3838 Proc1 0 18 0 | 19 20 21 | 0 0 3839 0 0 0 | 22 23 0 | 24 0 3840 ------------------------------------- 3841 Proc2 25 26 27 | 0 0 28 | 29 0 3842 30 0 0 | 31 32 33 | 0 34 3843 .ve 3844 3845 This can be represented as a collection of submatrices as: 3846 3847 .vb 3848 A B C 3849 D E F 3850 G H I 3851 .ve 3852 3853 Where the submatrices A,B,C are owned by proc0, D,E,F are 3854 owned by proc1, G,H,I are owned by proc2. 3855 3856 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3857 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3858 The 'M','N' parameters are 8,8, and have the same values on all procs. 3859 3860 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3861 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3862 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3863 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3864 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3865 matrix, ans [DF] as another SeqAIJ matrix. 3866 3867 When d_nz, o_nz parameters are specified, d_nz storage elements are 3868 allocated for every row of the local diagonal submatrix, and o_nz 3869 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3870 One way to choose d_nz and o_nz is to use the max nonzerors per local 3871 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3872 In this case, the values of d_nz,o_nz are: 3873 .vb 3874 proc0 : dnz = 2, o_nz = 2 3875 proc1 : dnz = 3, o_nz = 2 3876 proc2 : dnz = 1, o_nz = 4 3877 .ve 3878 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3879 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3880 for proc3. i.e we are using 12+15+10=37 storage locations to store 3881 34 values. 3882 3883 When d_nnz, o_nnz parameters are specified, the storage is specified 3884 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3885 In the above case the values for d_nnz,o_nnz are: 3886 .vb 3887 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3888 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3889 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3890 .ve 3891 Here the space allocated is sum of all the above values i.e 34, and 3892 hence pre-allocation is perfect. 3893 3894 Level: intermediate 3895 3896 .keywords: matrix, aij, compressed row, sparse, parallel 3897 3898 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3899 MPIAIJ, MatGetInfo() 3900 @*/ 3901 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3902 { 3903 PetscErrorCode ierr; 3904 3905 PetscFunctionBegin; 3906 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3907 PetscValidType(B,1); 3908 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3909 PetscFunctionReturn(0); 3910 } 3911 3912 #undef __FUNCT__ 3913 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3914 /*@ 3915 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3916 CSR format the local rows. 3917 3918 Collective on MPI_Comm 3919 3920 Input Parameters: 3921 + comm - MPI communicator 3922 . m - number of local rows (Cannot be PETSC_DECIDE) 3923 . n - This value should be the same as the local size used in creating the 3924 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3925 calculated if N is given) For square matrices n is almost always m. 3926 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3927 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3928 . i - row indices 3929 . j - column indices 3930 - a - matrix values 3931 3932 Output Parameter: 3933 . mat - the matrix 3934 3935 Level: intermediate 3936 3937 Notes: 3938 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3939 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3940 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3941 3942 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3943 3944 The format which is used for the sparse matrix input, is equivalent to a 3945 row-major ordering.. i.e for the following matrix, the input data expected is 3946 as shown: 3947 3948 1 0 0 3949 2 0 3 P0 3950 ------- 3951 4 5 6 P1 3952 3953 Process0 [P0]: rows_owned=[0,1] 3954 i = {0,1,3} [size = nrow+1 = 2+1] 3955 j = {0,0,2} [size = nz = 6] 3956 v = {1,2,3} [size = nz = 6] 3957 3958 Process1 [P1]: rows_owned=[2] 3959 i = {0,3} [size = nrow+1 = 1+1] 3960 j = {0,1,2} [size = nz = 6] 3961 v = {4,5,6} [size = nz = 6] 3962 3963 .keywords: matrix, aij, compressed row, sparse, parallel 3964 3965 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3966 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3967 @*/ 3968 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3969 { 3970 PetscErrorCode ierr; 3971 3972 PetscFunctionBegin; 3973 if (i[0]) { 3974 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3975 } 3976 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3977 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3978 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3979 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3980 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3981 PetscFunctionReturn(0); 3982 } 3983 3984 #undef __FUNCT__ 3985 #define __FUNCT__ "MatCreateMPIAIJ" 3986 /*@C 3987 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3988 (the default parallel PETSc format). For good matrix assembly performance 3989 the user should preallocate the matrix storage by setting the parameters 3990 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3991 performance can be increased by more than a factor of 50. 3992 3993 Collective on MPI_Comm 3994 3995 Input Parameters: 3996 + comm - MPI communicator 3997 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3998 This value should be the same as the local size used in creating the 3999 y vector for the matrix-vector product y = Ax. 4000 . n - This value should be the same as the local size used in creating the 4001 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4002 calculated if N is given) For square matrices n is almost always m. 4003 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4004 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4005 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4006 (same value is used for all local rows) 4007 . d_nnz - array containing the number of nonzeros in the various rows of the 4008 DIAGONAL portion of the local submatrix (possibly different for each row) 4009 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 4010 The size of this array is equal to the number of local rows, i.e 'm'. 4011 You must leave room for the diagonal entry even if it is zero. 4012 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4013 submatrix (same value is used for all local rows). 4014 - o_nnz - array containing the number of nonzeros in the various rows of the 4015 OFF-DIAGONAL portion of the local submatrix (possibly different for 4016 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 4017 structure. The size of this array is equal to the number 4018 of local rows, i.e 'm'. 4019 4020 Output Parameter: 4021 . A - the matrix 4022 4023 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4024 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4025 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4026 4027 Notes: 4028 If the *_nnz parameter is given then the *_nz parameter is ignored 4029 4030 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4031 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4032 storage requirements for this matrix. 4033 4034 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4035 processor than it must be used on all processors that share the object for 4036 that argument. 4037 4038 The user MUST specify either the local or global matrix dimensions 4039 (possibly both). 4040 4041 The parallel matrix is partitioned across processors such that the 4042 first m0 rows belong to process 0, the next m1 rows belong to 4043 process 1, the next m2 rows belong to process 2 etc.. where 4044 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4045 values corresponding to [m x N] submatrix. 4046 4047 The columns are logically partitioned with the n0 columns belonging 4048 to 0th partition, the next n1 columns belonging to the next 4049 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4050 4051 The DIAGONAL portion of the local submatrix on any given processor 4052 is the submatrix corresponding to the rows and columns m,n 4053 corresponding to the given processor. i.e diagonal matrix on 4054 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4055 etc. The remaining portion of the local submatrix [m x (N-n)] 4056 constitute the OFF-DIAGONAL portion. The example below better 4057 illustrates this concept. 4058 4059 For a square global matrix we define each processor's diagonal portion 4060 to be its local rows and the corresponding columns (a square submatrix); 4061 each processor's off-diagonal portion encompasses the remainder of the 4062 local matrix (a rectangular submatrix). 4063 4064 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4065 4066 When calling this routine with a single process communicator, a matrix of 4067 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4068 type of communicator, use the construction mechanism: 4069 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4070 4071 By default, this format uses inodes (identical nodes) when possible. 4072 We search for consecutive rows with the same nonzero structure, thereby 4073 reusing matrix information to achieve increased efficiency. 4074 4075 Options Database Keys: 4076 + -mat_no_inode - Do not use inodes 4077 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4078 - -mat_aij_oneindex - Internally use indexing starting at 1 4079 rather than 0. Note that when calling MatSetValues(), 4080 the user still MUST index entries starting at 0! 4081 4082 4083 Example usage: 4084 4085 Consider the following 8x8 matrix with 34 non-zero values, that is 4086 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4087 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4088 as follows: 4089 4090 .vb 4091 1 2 0 | 0 3 0 | 0 4 4092 Proc0 0 5 6 | 7 0 0 | 8 0 4093 9 0 10 | 11 0 0 | 12 0 4094 ------------------------------------- 4095 13 0 14 | 15 16 17 | 0 0 4096 Proc1 0 18 0 | 19 20 21 | 0 0 4097 0 0 0 | 22 23 0 | 24 0 4098 ------------------------------------- 4099 Proc2 25 26 27 | 0 0 28 | 29 0 4100 30 0 0 | 31 32 33 | 0 34 4101 .ve 4102 4103 This can be represented as a collection of submatrices as: 4104 4105 .vb 4106 A B C 4107 D E F 4108 G H I 4109 .ve 4110 4111 Where the submatrices A,B,C are owned by proc0, D,E,F are 4112 owned by proc1, G,H,I are owned by proc2. 4113 4114 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4115 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4116 The 'M','N' parameters are 8,8, and have the same values on all procs. 4117 4118 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4119 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4120 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4121 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4122 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4123 matrix, ans [DF] as another SeqAIJ matrix. 4124 4125 When d_nz, o_nz parameters are specified, d_nz storage elements are 4126 allocated for every row of the local diagonal submatrix, and o_nz 4127 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4128 One way to choose d_nz and o_nz is to use the max nonzerors per local 4129 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4130 In this case, the values of d_nz,o_nz are: 4131 .vb 4132 proc0 : dnz = 2, o_nz = 2 4133 proc1 : dnz = 3, o_nz = 2 4134 proc2 : dnz = 1, o_nz = 4 4135 .ve 4136 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4137 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4138 for proc3. i.e we are using 12+15+10=37 storage locations to store 4139 34 values. 4140 4141 When d_nnz, o_nnz parameters are specified, the storage is specified 4142 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4143 In the above case the values for d_nnz,o_nnz are: 4144 .vb 4145 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4146 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4147 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4148 .ve 4149 Here the space allocated is sum of all the above values i.e 34, and 4150 hence pre-allocation is perfect. 4151 4152 Level: intermediate 4153 4154 .keywords: matrix, aij, compressed row, sparse, parallel 4155 4156 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4157 MPIAIJ, MatCreateMPIAIJWithArrays() 4158 @*/ 4159 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4160 { 4161 PetscErrorCode ierr; 4162 PetscMPIInt size; 4163 4164 PetscFunctionBegin; 4165 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4166 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4167 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4168 if (size > 1) { 4169 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4170 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4171 } else { 4172 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4173 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4174 } 4175 PetscFunctionReturn(0); 4176 } 4177 4178 #undef __FUNCT__ 4179 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4180 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 4181 { 4182 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4183 4184 PetscFunctionBegin; 4185 *Ad = a->A; 4186 *Ao = a->B; 4187 *colmap = a->garray; 4188 PetscFunctionReturn(0); 4189 } 4190 4191 #undef __FUNCT__ 4192 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4193 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4194 { 4195 PetscErrorCode ierr; 4196 PetscInt i; 4197 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4198 4199 PetscFunctionBegin; 4200 if (coloring->ctype == IS_COLORING_GLOBAL) { 4201 ISColoringValue *allcolors,*colors; 4202 ISColoring ocoloring; 4203 4204 /* set coloring for diagonal portion */ 4205 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4206 4207 /* set coloring for off-diagonal portion */ 4208 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 4209 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4210 for (i=0; i<a->B->cmap->n; i++) { 4211 colors[i] = allcolors[a->garray[i]]; 4212 } 4213 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4214 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4215 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4216 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4217 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4218 ISColoringValue *colors; 4219 PetscInt *larray; 4220 ISColoring ocoloring; 4221 4222 /* set coloring for diagonal portion */ 4223 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4224 for (i=0; i<a->A->cmap->n; i++) { 4225 larray[i] = i + A->cmap->rstart; 4226 } 4227 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 4228 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4229 for (i=0; i<a->A->cmap->n; i++) { 4230 colors[i] = coloring->colors[larray[i]]; 4231 } 4232 ierr = PetscFree(larray);CHKERRQ(ierr); 4233 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4234 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4235 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4236 4237 /* set coloring for off-diagonal portion */ 4238 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4239 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 4240 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4241 for (i=0; i<a->B->cmap->n; i++) { 4242 colors[i] = coloring->colors[larray[i]]; 4243 } 4244 ierr = PetscFree(larray);CHKERRQ(ierr); 4245 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4246 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4247 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4248 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4249 4250 PetscFunctionReturn(0); 4251 } 4252 4253 #if defined(PETSC_HAVE_ADIC) 4254 #undef __FUNCT__ 4255 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 4256 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 4257 { 4258 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4259 PetscErrorCode ierr; 4260 4261 PetscFunctionBegin; 4262 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 4263 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 4264 PetscFunctionReturn(0); 4265 } 4266 #endif 4267 4268 #undef __FUNCT__ 4269 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4270 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4271 { 4272 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4273 PetscErrorCode ierr; 4274 4275 PetscFunctionBegin; 4276 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4277 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4278 PetscFunctionReturn(0); 4279 } 4280 4281 #undef __FUNCT__ 4282 #define __FUNCT__ "MatMergeSymbolic" 4283 PetscErrorCode MatMergeSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4284 { 4285 PetscErrorCode ierr; 4286 PetscInt m,N,i,rstart,nnz,*dnz,*onz; 4287 PetscInt *indx; 4288 4289 PetscFunctionBegin; 4290 /* This routine will ONLY return MPIAIJ type matrix */ 4291 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4292 if (n == PETSC_DECIDE){ 4293 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4294 } 4295 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4296 rstart -= m; 4297 4298 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4299 for (i=0;i<m;i++) { 4300 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4301 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4302 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4303 } 4304 4305 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4306 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4307 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4308 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4309 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4310 PetscFunctionReturn(0); 4311 } 4312 4313 #undef __FUNCT__ 4314 #define __FUNCT__ "MatMergeNumeric" 4315 PetscErrorCode MatMergeNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4316 { 4317 PetscErrorCode ierr; 4318 PetscInt m,N,i,rstart,nnz,Ii; 4319 PetscInt *indx; 4320 PetscScalar *values; 4321 4322 PetscFunctionBegin; 4323 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4324 ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 4325 for (i=0;i<m;i++) { 4326 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4327 Ii = i + rstart; 4328 ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4329 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4330 } 4331 ierr = MatDestroy(&inmat);CHKERRQ(ierr); 4332 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4333 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4334 PetscFunctionReturn(0); 4335 } 4336 4337 #undef __FUNCT__ 4338 #define __FUNCT__ "MatMerge" 4339 /*@ 4340 MatMerge - Creates a single large PETSc matrix by concatinating sequential 4341 matrices from each processor 4342 4343 Collective on MPI_Comm 4344 4345 Input Parameters: 4346 + comm - the communicators the parallel matrix will live on 4347 . inmat - the input sequential matrices 4348 . n - number of local columns (or PETSC_DECIDE) 4349 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4350 4351 Output Parameter: 4352 . outmat - the parallel matrix generated 4353 4354 Level: advanced 4355 4356 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4357 4358 @*/ 4359 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4360 { 4361 PetscErrorCode ierr; 4362 4363 PetscFunctionBegin; 4364 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4365 if (scall == MAT_INITIAL_MATRIX){ 4366 ierr = MatMergeSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4367 } 4368 ierr = MatMergeNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4369 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4370 PetscFunctionReturn(0); 4371 } 4372 4373 #undef __FUNCT__ 4374 #define __FUNCT__ "MatFileSplit" 4375 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4376 { 4377 PetscErrorCode ierr; 4378 PetscMPIInt rank; 4379 PetscInt m,N,i,rstart,nnz; 4380 size_t len; 4381 const PetscInt *indx; 4382 PetscViewer out; 4383 char *name; 4384 Mat B; 4385 const PetscScalar *values; 4386 4387 PetscFunctionBegin; 4388 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4389 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4390 /* Should this be the type of the diagonal block of A? */ 4391 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4392 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4393 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4394 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4395 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4396 for (i=0;i<m;i++) { 4397 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4398 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4399 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4400 } 4401 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4402 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4403 4404 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4405 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4406 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4407 sprintf(name,"%s.%d",outfile,rank); 4408 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4409 ierr = PetscFree(name); 4410 ierr = MatView(B,out);CHKERRQ(ierr); 4411 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4412 ierr = MatDestroy(&B);CHKERRQ(ierr); 4413 PetscFunctionReturn(0); 4414 } 4415 4416 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4417 #undef __FUNCT__ 4418 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4419 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4420 { 4421 PetscErrorCode ierr; 4422 Mat_Merge_SeqsToMPI *merge; 4423 PetscContainer container; 4424 4425 PetscFunctionBegin; 4426 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4427 if (container) { 4428 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4429 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4430 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4431 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4432 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4433 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4434 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4435 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4436 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4437 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4438 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4439 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4440 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4441 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4442 ierr = PetscFree(merge);CHKERRQ(ierr); 4443 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4444 } 4445 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4446 PetscFunctionReturn(0); 4447 } 4448 4449 #include <../src/mat/utils/freespace.h> 4450 #include <petscbt.h> 4451 4452 #undef __FUNCT__ 4453 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 4454 /*@C 4455 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 4456 matrices from each processor 4457 4458 Collective on MPI_Comm 4459 4460 Input Parameters: 4461 + comm - the communicators the parallel matrix will live on 4462 . seqmat - the input sequential matrices 4463 . m - number of local rows (or PETSC_DECIDE) 4464 . n - number of local columns (or PETSC_DECIDE) 4465 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4466 4467 Output Parameter: 4468 . mpimat - the parallel matrix generated 4469 4470 Level: advanced 4471 4472 Notes: 4473 The dimensions of the sequential matrix in each processor MUST be the same. 4474 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4475 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4476 @*/ 4477 PetscErrorCode MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 4478 { 4479 PetscErrorCode ierr; 4480 MPI_Comm comm=((PetscObject)mpimat)->comm; 4481 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4482 PetscMPIInt size,rank,taga,*len_s; 4483 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4484 PetscInt proc,m; 4485 PetscInt **buf_ri,**buf_rj; 4486 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4487 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4488 MPI_Request *s_waits,*r_waits; 4489 MPI_Status *status; 4490 MatScalar *aa=a->a; 4491 MatScalar **abuf_r,*ba_i; 4492 Mat_Merge_SeqsToMPI *merge; 4493 PetscContainer container; 4494 4495 PetscFunctionBegin; 4496 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4497 4498 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4499 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4500 4501 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4502 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4503 4504 bi = merge->bi; 4505 bj = merge->bj; 4506 buf_ri = merge->buf_ri; 4507 buf_rj = merge->buf_rj; 4508 4509 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4510 owners = merge->rowmap->range; 4511 len_s = merge->len_s; 4512 4513 /* send and recv matrix values */ 4514 /*-----------------------------*/ 4515 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4516 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4517 4518 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4519 for (proc=0,k=0; proc<size; proc++){ 4520 if (!len_s[proc]) continue; 4521 i = owners[proc]; 4522 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4523 k++; 4524 } 4525 4526 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4527 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4528 ierr = PetscFree(status);CHKERRQ(ierr); 4529 4530 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4531 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4532 4533 /* insert mat values of mpimat */ 4534 /*----------------------------*/ 4535 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4536 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4537 4538 for (k=0; k<merge->nrecv; k++){ 4539 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4540 nrows = *(buf_ri_k[k]); 4541 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4542 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4543 } 4544 4545 /* set values of ba */ 4546 m = merge->rowmap->n; 4547 for (i=0; i<m; i++) { 4548 arow = owners[rank] + i; 4549 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4550 bnzi = bi[i+1] - bi[i]; 4551 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4552 4553 /* add local non-zero vals of this proc's seqmat into ba */ 4554 anzi = ai[arow+1] - ai[arow]; 4555 aj = a->j + ai[arow]; 4556 aa = a->a + ai[arow]; 4557 nextaj = 0; 4558 for (j=0; nextaj<anzi; j++){ 4559 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4560 ba_i[j] += aa[nextaj++]; 4561 } 4562 } 4563 4564 /* add received vals into ba */ 4565 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4566 /* i-th row */ 4567 if (i == *nextrow[k]) { 4568 anzi = *(nextai[k]+1) - *nextai[k]; 4569 aj = buf_rj[k] + *(nextai[k]); 4570 aa = abuf_r[k] + *(nextai[k]); 4571 nextaj = 0; 4572 for (j=0; nextaj<anzi; j++){ 4573 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4574 ba_i[j] += aa[nextaj++]; 4575 } 4576 } 4577 nextrow[k]++; nextai[k]++; 4578 } 4579 } 4580 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4581 } 4582 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4583 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4584 4585 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4586 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4587 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4588 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4589 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4590 PetscFunctionReturn(0); 4591 } 4592 4593 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4594 4595 #undef __FUNCT__ 4596 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4597 PetscErrorCode MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4598 { 4599 PetscErrorCode ierr; 4600 Mat B_mpi; 4601 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4602 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4603 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4604 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4605 PetscInt len,proc,*dnz,*onz; 4606 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4607 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4608 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4609 MPI_Status *status; 4610 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4611 PetscBT lnkbt; 4612 Mat_Merge_SeqsToMPI *merge; 4613 PetscContainer container; 4614 4615 PetscFunctionBegin; 4616 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4617 4618 /* make sure it is a PETSc comm */ 4619 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4620 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4621 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4622 4623 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4624 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4625 4626 /* determine row ownership */ 4627 /*---------------------------------------------------------*/ 4628 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4629 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4630 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4631 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4632 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4633 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4634 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4635 4636 m = merge->rowmap->n; 4637 M = merge->rowmap->N; 4638 owners = merge->rowmap->range; 4639 4640 /* determine the number of messages to send, their lengths */ 4641 /*---------------------------------------------------------*/ 4642 len_s = merge->len_s; 4643 4644 len = 0; /* length of buf_si[] */ 4645 merge->nsend = 0; 4646 for (proc=0; proc<size; proc++){ 4647 len_si[proc] = 0; 4648 if (proc == rank){ 4649 len_s[proc] = 0; 4650 } else { 4651 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4652 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4653 } 4654 if (len_s[proc]) { 4655 merge->nsend++; 4656 nrows = 0; 4657 for (i=owners[proc]; i<owners[proc+1]; i++){ 4658 if (ai[i+1] > ai[i]) nrows++; 4659 } 4660 len_si[proc] = 2*(nrows+1); 4661 len += len_si[proc]; 4662 } 4663 } 4664 4665 /* determine the number and length of messages to receive for ij-structure */ 4666 /*-------------------------------------------------------------------------*/ 4667 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4668 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4669 4670 /* post the Irecv of j-structure */ 4671 /*-------------------------------*/ 4672 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4673 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4674 4675 /* post the Isend of j-structure */ 4676 /*--------------------------------*/ 4677 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4678 4679 for (proc=0, k=0; proc<size; proc++){ 4680 if (!len_s[proc]) continue; 4681 i = owners[proc]; 4682 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4683 k++; 4684 } 4685 4686 /* receives and sends of j-structure are complete */ 4687 /*------------------------------------------------*/ 4688 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4689 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4690 4691 /* send and recv i-structure */ 4692 /*---------------------------*/ 4693 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4694 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4695 4696 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4697 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4698 for (proc=0,k=0; proc<size; proc++){ 4699 if (!len_s[proc]) continue; 4700 /* form outgoing message for i-structure: 4701 buf_si[0]: nrows to be sent 4702 [1:nrows]: row index (global) 4703 [nrows+1:2*nrows+1]: i-structure index 4704 */ 4705 /*-------------------------------------------*/ 4706 nrows = len_si[proc]/2 - 1; 4707 buf_si_i = buf_si + nrows+1; 4708 buf_si[0] = nrows; 4709 buf_si_i[0] = 0; 4710 nrows = 0; 4711 for (i=owners[proc]; i<owners[proc+1]; i++){ 4712 anzi = ai[i+1] - ai[i]; 4713 if (anzi) { 4714 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4715 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4716 nrows++; 4717 } 4718 } 4719 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4720 k++; 4721 buf_si += len_si[proc]; 4722 } 4723 4724 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4725 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4726 4727 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4728 for (i=0; i<merge->nrecv; i++){ 4729 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4730 } 4731 4732 ierr = PetscFree(len_si);CHKERRQ(ierr); 4733 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4734 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4735 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4736 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4737 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4738 ierr = PetscFree(status);CHKERRQ(ierr); 4739 4740 /* compute a local seq matrix in each processor */ 4741 /*----------------------------------------------*/ 4742 /* allocate bi array and free space for accumulating nonzero column info */ 4743 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4744 bi[0] = 0; 4745 4746 /* create and initialize a linked list */ 4747 nlnk = N+1; 4748 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4749 4750 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4751 len = 0; 4752 len = ai[owners[rank+1]] - ai[owners[rank]]; 4753 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4754 current_space = free_space; 4755 4756 /* determine symbolic info for each local row */ 4757 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4758 4759 for (k=0; k<merge->nrecv; k++){ 4760 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4761 nrows = *buf_ri_k[k]; 4762 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4763 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4764 } 4765 4766 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4767 len = 0; 4768 for (i=0;i<m;i++) { 4769 bnzi = 0; 4770 /* add local non-zero cols of this proc's seqmat into lnk */ 4771 arow = owners[rank] + i; 4772 anzi = ai[arow+1] - ai[arow]; 4773 aj = a->j + ai[arow]; 4774 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4775 bnzi += nlnk; 4776 /* add received col data into lnk */ 4777 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4778 if (i == *nextrow[k]) { /* i-th row */ 4779 anzi = *(nextai[k]+1) - *nextai[k]; 4780 aj = buf_rj[k] + *nextai[k]; 4781 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4782 bnzi += nlnk; 4783 nextrow[k]++; nextai[k]++; 4784 } 4785 } 4786 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4787 4788 /* if free space is not available, make more free space */ 4789 if (current_space->local_remaining<bnzi) { 4790 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4791 nspacedouble++; 4792 } 4793 /* copy data into free space, then initialize lnk */ 4794 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4795 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4796 4797 current_space->array += bnzi; 4798 current_space->local_used += bnzi; 4799 current_space->local_remaining -= bnzi; 4800 4801 bi[i+1] = bi[i] + bnzi; 4802 } 4803 4804 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4805 4806 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4807 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4808 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4809 4810 /* create symbolic parallel matrix B_mpi */ 4811 /*---------------------------------------*/ 4812 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4813 if (n==PETSC_DECIDE) { 4814 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4815 } else { 4816 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4817 } 4818 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4819 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4820 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4821 4822 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4823 B_mpi->assembled = PETSC_FALSE; 4824 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4825 merge->bi = bi; 4826 merge->bj = bj; 4827 merge->buf_ri = buf_ri; 4828 merge->buf_rj = buf_rj; 4829 merge->coi = PETSC_NULL; 4830 merge->coj = PETSC_NULL; 4831 merge->owners_co = PETSC_NULL; 4832 4833 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4834 4835 /* attach the supporting struct to B_mpi for reuse */ 4836 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4837 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4838 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4839 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4840 *mpimat = B_mpi; 4841 4842 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4843 PetscFunctionReturn(0); 4844 } 4845 4846 #undef __FUNCT__ 4847 #define __FUNCT__ "MatMerge_SeqsToMPI" 4848 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4849 { 4850 PetscErrorCode ierr; 4851 4852 PetscFunctionBegin; 4853 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4854 if (scall == MAT_INITIAL_MATRIX){ 4855 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4856 } 4857 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4858 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4859 PetscFunctionReturn(0); 4860 } 4861 4862 #undef __FUNCT__ 4863 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4864 /*@ 4865 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4866 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4867 with MatGetSize() 4868 4869 Not Collective 4870 4871 Input Parameters: 4872 + A - the matrix 4873 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4874 4875 Output Parameter: 4876 . A_loc - the local sequential matrix generated 4877 4878 Level: developer 4879 4880 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4881 4882 @*/ 4883 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4884 { 4885 PetscErrorCode ierr; 4886 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4887 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4888 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4889 MatScalar *aa=a->a,*ba=b->a,*cam; 4890 PetscScalar *ca; 4891 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4892 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4893 PetscBool match; 4894 4895 PetscFunctionBegin; 4896 ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4897 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4898 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4899 if (scall == MAT_INITIAL_MATRIX){ 4900 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4901 ci[0] = 0; 4902 for (i=0; i<am; i++){ 4903 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4904 } 4905 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4906 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4907 k = 0; 4908 for (i=0; i<am; i++) { 4909 ncols_o = bi[i+1] - bi[i]; 4910 ncols_d = ai[i+1] - ai[i]; 4911 /* off-diagonal portion of A */ 4912 for (jo=0; jo<ncols_o; jo++) { 4913 col = cmap[*bj]; 4914 if (col >= cstart) break; 4915 cj[k] = col; bj++; 4916 ca[k++] = *ba++; 4917 } 4918 /* diagonal portion of A */ 4919 for (j=0; j<ncols_d; j++) { 4920 cj[k] = cstart + *aj++; 4921 ca[k++] = *aa++; 4922 } 4923 /* off-diagonal portion of A */ 4924 for (j=jo; j<ncols_o; j++) { 4925 cj[k] = cmap[*bj++]; 4926 ca[k++] = *ba++; 4927 } 4928 } 4929 /* put together the new matrix */ 4930 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4931 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4932 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4933 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4934 mat->free_a = PETSC_TRUE; 4935 mat->free_ij = PETSC_TRUE; 4936 mat->nonew = 0; 4937 } else if (scall == MAT_REUSE_MATRIX){ 4938 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4939 ci = mat->i; cj = mat->j; cam = mat->a; 4940 for (i=0; i<am; i++) { 4941 /* off-diagonal portion of A */ 4942 ncols_o = bi[i+1] - bi[i]; 4943 for (jo=0; jo<ncols_o; jo++) { 4944 col = cmap[*bj]; 4945 if (col >= cstart) break; 4946 *cam++ = *ba++; bj++; 4947 } 4948 /* diagonal portion of A */ 4949 ncols_d = ai[i+1] - ai[i]; 4950 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4951 /* off-diagonal portion of A */ 4952 for (j=jo; j<ncols_o; j++) { 4953 *cam++ = *ba++; bj++; 4954 } 4955 } 4956 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4957 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4958 PetscFunctionReturn(0); 4959 } 4960 4961 #undef __FUNCT__ 4962 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 4963 /*@C 4964 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 4965 4966 Not Collective 4967 4968 Input Parameters: 4969 + A - the matrix 4970 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4971 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4972 4973 Output Parameter: 4974 . A_loc - the local sequential matrix generated 4975 4976 Level: developer 4977 4978 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 4979 4980 @*/ 4981 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4982 { 4983 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4984 PetscErrorCode ierr; 4985 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4986 IS isrowa,iscola; 4987 Mat *aloc; 4988 PetscBool match; 4989 4990 PetscFunctionBegin; 4991 ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4992 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4993 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4994 if (!row){ 4995 start = A->rmap->rstart; end = A->rmap->rend; 4996 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4997 } else { 4998 isrowa = *row; 4999 } 5000 if (!col){ 5001 start = A->cmap->rstart; 5002 cmap = a->garray; 5003 nzA = a->A->cmap->n; 5004 nzB = a->B->cmap->n; 5005 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5006 ncols = 0; 5007 for (i=0; i<nzB; i++) { 5008 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5009 else break; 5010 } 5011 imark = i; 5012 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5013 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5014 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5015 } else { 5016 iscola = *col; 5017 } 5018 if (scall != MAT_INITIAL_MATRIX){ 5019 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5020 aloc[0] = *A_loc; 5021 } 5022 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5023 *A_loc = aloc[0]; 5024 ierr = PetscFree(aloc);CHKERRQ(ierr); 5025 if (!row){ 5026 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5027 } 5028 if (!col){ 5029 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5030 } 5031 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5032 PetscFunctionReturn(0); 5033 } 5034 5035 #undef __FUNCT__ 5036 #define __FUNCT__ "MatGetBrowsOfAcols" 5037 /*@C 5038 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5039 5040 Collective on Mat 5041 5042 Input Parameters: 5043 + A,B - the matrices in mpiaij format 5044 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5045 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 5046 5047 Output Parameter: 5048 + rowb, colb - index sets of rows and columns of B to extract 5049 - B_seq - the sequential matrix generated 5050 5051 Level: developer 5052 5053 @*/ 5054 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5055 { 5056 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5057 PetscErrorCode ierr; 5058 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5059 IS isrowb,iscolb; 5060 Mat *bseq=PETSC_NULL; 5061 5062 PetscFunctionBegin; 5063 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5064 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5065 } 5066 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5067 5068 if (scall == MAT_INITIAL_MATRIX){ 5069 start = A->cmap->rstart; 5070 cmap = a->garray; 5071 nzA = a->A->cmap->n; 5072 nzB = a->B->cmap->n; 5073 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5074 ncols = 0; 5075 for (i=0; i<nzB; i++) { /* row < local row index */ 5076 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5077 else break; 5078 } 5079 imark = i; 5080 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5081 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5082 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5083 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5084 } else { 5085 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5086 isrowb = *rowb; iscolb = *colb; 5087 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5088 bseq[0] = *B_seq; 5089 } 5090 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5091 *B_seq = bseq[0]; 5092 ierr = PetscFree(bseq);CHKERRQ(ierr); 5093 if (!rowb){ 5094 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5095 } else { 5096 *rowb = isrowb; 5097 } 5098 if (!colb){ 5099 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5100 } else { 5101 *colb = iscolb; 5102 } 5103 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5104 PetscFunctionReturn(0); 5105 } 5106 5107 #undef __FUNCT__ 5108 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5109 /* 5110 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5111 of the OFF-DIAGONAL portion of local A 5112 5113 Collective on Mat 5114 5115 Input Parameters: 5116 + A,B - the matrices in mpiaij format 5117 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5118 5119 Output Parameter: 5120 + startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5121 . startsj_r - similar to startsj for receives 5122 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 5123 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5124 5125 Level: developer 5126 5127 */ 5128 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5129 { 5130 VecScatter_MPI_General *gen_to,*gen_from; 5131 PetscErrorCode ierr; 5132 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5133 Mat_SeqAIJ *b_oth; 5134 VecScatter ctx=a->Mvctx; 5135 MPI_Comm comm=((PetscObject)ctx)->comm; 5136 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5137 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5138 PetscScalar *rvalues,*svalues; 5139 MatScalar *b_otha,*bufa,*bufA; 5140 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5141 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 5142 MPI_Status *sstatus,rstatus; 5143 PetscMPIInt jj; 5144 PetscInt *cols,sbs,rbs; 5145 PetscScalar *vals; 5146 5147 PetscFunctionBegin; 5148 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5149 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5150 } 5151 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5152 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5153 5154 gen_to = (VecScatter_MPI_General*)ctx->todata; 5155 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5156 rvalues = gen_from->values; /* holds the length of receiving row */ 5157 svalues = gen_to->values; /* holds the length of sending row */ 5158 nrecvs = gen_from->n; 5159 nsends = gen_to->n; 5160 5161 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5162 srow = gen_to->indices; /* local row index to be sent */ 5163 sstarts = gen_to->starts; 5164 sprocs = gen_to->procs; 5165 sstatus = gen_to->sstatus; 5166 sbs = gen_to->bs; 5167 rstarts = gen_from->starts; 5168 rprocs = gen_from->procs; 5169 rbs = gen_from->bs; 5170 5171 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5172 if (scall == MAT_INITIAL_MATRIX){ 5173 /* i-array */ 5174 /*---------*/ 5175 /* post receives */ 5176 for (i=0; i<nrecvs; i++){ 5177 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5178 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5179 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5180 } 5181 5182 /* pack the outgoing message */ 5183 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5184 sstartsj[0] = 0; rstartsj[0] = 0; 5185 len = 0; /* total length of j or a array to be sent */ 5186 k = 0; 5187 for (i=0; i<nsends; i++){ 5188 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5189 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5190 for (j=0; j<nrows; j++) { 5191 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5192 for (l=0; l<sbs; l++){ 5193 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 5194 rowlen[j*sbs+l] = ncols; 5195 len += ncols; 5196 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 5197 } 5198 k++; 5199 } 5200 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5201 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5202 } 5203 /* recvs and sends of i-array are completed */ 5204 i = nrecvs; 5205 while (i--) { 5206 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5207 } 5208 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5209 5210 /* allocate buffers for sending j and a arrays */ 5211 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5212 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5213 5214 /* create i-array of B_oth */ 5215 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5216 b_othi[0] = 0; 5217 len = 0; /* total length of j or a array to be received */ 5218 k = 0; 5219 for (i=0; i<nrecvs; i++){ 5220 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5221 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5222 for (j=0; j<nrows; j++) { 5223 b_othi[k+1] = b_othi[k] + rowlen[j]; 5224 len += rowlen[j]; k++; 5225 } 5226 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5227 } 5228 5229 /* allocate space for j and a arrrays of B_oth */ 5230 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5231 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5232 5233 /* j-array */ 5234 /*---------*/ 5235 /* post receives of j-array */ 5236 for (i=0; i<nrecvs; i++){ 5237 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5238 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5239 } 5240 5241 /* pack the outgoing message j-array */ 5242 k = 0; 5243 for (i=0; i<nsends; i++){ 5244 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5245 bufJ = bufj+sstartsj[i]; 5246 for (j=0; j<nrows; j++) { 5247 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5248 for (ll=0; ll<sbs; ll++){ 5249 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5250 for (l=0; l<ncols; l++){ 5251 *bufJ++ = cols[l]; 5252 } 5253 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5254 } 5255 } 5256 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5257 } 5258 5259 /* recvs and sends of j-array are completed */ 5260 i = nrecvs; 5261 while (i--) { 5262 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5263 } 5264 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5265 } else if (scall == MAT_REUSE_MATRIX){ 5266 sstartsj = *startsj; 5267 rstartsj = *startsj_r; 5268 bufa = *bufa_ptr; 5269 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5270 b_otha = b_oth->a; 5271 } else { 5272 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5273 } 5274 5275 /* a-array */ 5276 /*---------*/ 5277 /* post receives of a-array */ 5278 for (i=0; i<nrecvs; i++){ 5279 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5280 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5281 } 5282 5283 /* pack the outgoing message a-array */ 5284 k = 0; 5285 for (i=0; i<nsends; i++){ 5286 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5287 bufA = bufa+sstartsj[i]; 5288 for (j=0; j<nrows; j++) { 5289 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5290 for (ll=0; ll<sbs; ll++){ 5291 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5292 for (l=0; l<ncols; l++){ 5293 *bufA++ = vals[l]; 5294 } 5295 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5296 } 5297 } 5298 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5299 } 5300 /* recvs and sends of a-array are completed */ 5301 i = nrecvs; 5302 while (i--) { 5303 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5304 } 5305 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5306 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5307 5308 if (scall == MAT_INITIAL_MATRIX){ 5309 /* put together the new matrix */ 5310 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5311 5312 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5313 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5314 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5315 b_oth->free_a = PETSC_TRUE; 5316 b_oth->free_ij = PETSC_TRUE; 5317 b_oth->nonew = 0; 5318 5319 ierr = PetscFree(bufj);CHKERRQ(ierr); 5320 if (!startsj || !bufa_ptr){ 5321 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5322 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5323 } else { 5324 *startsj = sstartsj; 5325 *startsj_r = rstartsj; 5326 *bufa_ptr = bufa; 5327 } 5328 } 5329 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5330 PetscFunctionReturn(0); 5331 } 5332 5333 #undef __FUNCT__ 5334 #define __FUNCT__ "MatGetCommunicationStructs" 5335 /*@C 5336 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5337 5338 Not Collective 5339 5340 Input Parameters: 5341 . A - The matrix in mpiaij format 5342 5343 Output Parameter: 5344 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5345 . colmap - A map from global column index to local index into lvec 5346 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5347 5348 Level: developer 5349 5350 @*/ 5351 #if defined (PETSC_USE_CTABLE) 5352 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5353 #else 5354 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5355 #endif 5356 { 5357 Mat_MPIAIJ *a; 5358 5359 PetscFunctionBegin; 5360 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5361 PetscValidPointer(lvec, 2); 5362 PetscValidPointer(colmap, 3); 5363 PetscValidPointer(multScatter, 4); 5364 a = (Mat_MPIAIJ *) A->data; 5365 if (lvec) *lvec = a->lvec; 5366 if (colmap) *colmap = a->colmap; 5367 if (multScatter) *multScatter = a->Mvctx; 5368 PetscFunctionReturn(0); 5369 } 5370 5371 EXTERN_C_BEGIN 5372 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*); 5373 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*); 5374 extern PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 5375 EXTERN_C_END 5376 5377 #undef __FUNCT__ 5378 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5379 /* 5380 Computes (B'*A')' since computing B*A directly is untenable 5381 5382 n p p 5383 ( ) ( ) ( ) 5384 m ( A ) * n ( B ) = m ( C ) 5385 ( ) ( ) ( ) 5386 5387 */ 5388 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5389 { 5390 PetscErrorCode ierr; 5391 Mat At,Bt,Ct; 5392 5393 PetscFunctionBegin; 5394 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5395 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5396 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5397 ierr = MatDestroy(&At);CHKERRQ(ierr); 5398 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5399 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5400 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5401 PetscFunctionReturn(0); 5402 } 5403 5404 #undef __FUNCT__ 5405 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5406 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5407 { 5408 PetscErrorCode ierr; 5409 PetscInt m=A->rmap->n,n=B->cmap->n; 5410 Mat Cmat; 5411 5412 PetscFunctionBegin; 5413 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5414 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5415 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5416 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5417 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5418 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5419 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5420 *C = Cmat; 5421 (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ; 5422 PetscFunctionReturn(0); 5423 } 5424 5425 /* ----------------------------------------------------------------*/ 5426 #undef __FUNCT__ 5427 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5428 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5429 { 5430 PetscErrorCode ierr; 5431 5432 PetscFunctionBegin; 5433 if (scall == MAT_INITIAL_MATRIX){ 5434 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5435 } 5436 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5437 PetscFunctionReturn(0); 5438 } 5439 5440 EXTERN_C_BEGIN 5441 #if defined(PETSC_HAVE_MUMPS) 5442 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5443 #endif 5444 #if defined(PETSC_HAVE_PASTIX) 5445 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5446 #endif 5447 #if defined(PETSC_HAVE_SUPERLU_DIST) 5448 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5449 #endif 5450 #if defined(PETSC_HAVE_SPOOLES) 5451 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5452 #endif 5453 EXTERN_C_END 5454 5455 /*MC 5456 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5457 5458 Options Database Keys: 5459 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5460 5461 Level: beginner 5462 5463 .seealso: MatCreateMPIAIJ() 5464 M*/ 5465 5466 EXTERN_C_BEGIN 5467 #undef __FUNCT__ 5468 #define __FUNCT__ "MatCreate_MPIAIJ" 5469 PetscErrorCode MatCreate_MPIAIJ(Mat B) 5470 { 5471 Mat_MPIAIJ *b; 5472 PetscErrorCode ierr; 5473 PetscMPIInt size; 5474 5475 PetscFunctionBegin; 5476 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5477 5478 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5479 B->data = (void*)b; 5480 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5481 B->rmap->bs = 1; 5482 B->assembled = PETSC_FALSE; 5483 5484 B->insertmode = NOT_SET_VALUES; 5485 b->size = size; 5486 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5487 5488 /* build cache for off array entries formed */ 5489 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5490 b->donotstash = PETSC_FALSE; 5491 b->colmap = 0; 5492 b->garray = 0; 5493 b->roworiented = PETSC_TRUE; 5494 5495 /* stuff used for matrix vector multiply */ 5496 b->lvec = PETSC_NULL; 5497 b->Mvctx = PETSC_NULL; 5498 5499 /* stuff for MatGetRow() */ 5500 b->rowindices = 0; 5501 b->rowvalues = 0; 5502 b->getrowactive = PETSC_FALSE; 5503 5504 #if defined(PETSC_HAVE_SPOOLES) 5505 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5506 "MatGetFactor_mpiaij_spooles", 5507 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5508 #endif 5509 #if defined(PETSC_HAVE_MUMPS) 5510 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5511 "MatGetFactor_aij_mumps", 5512 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5513 #endif 5514 #if defined(PETSC_HAVE_PASTIX) 5515 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5516 "MatGetFactor_mpiaij_pastix", 5517 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5518 #endif 5519 #if defined(PETSC_HAVE_SUPERLU_DIST) 5520 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5521 "MatGetFactor_mpiaij_superlu_dist", 5522 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5523 #endif 5524 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5525 "MatStoreValues_MPIAIJ", 5526 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5527 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5528 "MatRetrieveValues_MPIAIJ", 5529 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5530 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5531 "MatGetDiagonalBlock_MPIAIJ", 5532 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5533 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5534 "MatIsTranspose_MPIAIJ", 5535 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5536 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5537 "MatMPIAIJSetPreallocation_MPIAIJ", 5538 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5539 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5540 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5541 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5542 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5543 "MatDiagonalScaleLocal_MPIAIJ", 5544 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5545 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5546 "MatConvert_MPIAIJ_MPIAIJPERM", 5547 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5548 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5549 "MatConvert_MPIAIJ_MPIAIJCRL", 5550 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5551 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5552 "MatConvert_MPIAIJ_MPISBAIJ", 5553 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5554 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5555 "MatMatMult_MPIDense_MPIAIJ", 5556 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5557 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5558 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5559 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5560 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5561 "MatMatMultNumeric_MPIDense_MPIAIJ", 5562 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5563 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5564 PetscFunctionReturn(0); 5565 } 5566 EXTERN_C_END 5567 5568 #undef __FUNCT__ 5569 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5570 /*@ 5571 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5572 and "off-diagonal" part of the matrix in CSR format. 5573 5574 Collective on MPI_Comm 5575 5576 Input Parameters: 5577 + comm - MPI communicator 5578 . m - number of local rows (Cannot be PETSC_DECIDE) 5579 . n - This value should be the same as the local size used in creating the 5580 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5581 calculated if N is given) For square matrices n is almost always m. 5582 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5583 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5584 . i - row indices for "diagonal" portion of matrix 5585 . j - column indices 5586 . a - matrix values 5587 . oi - row indices for "off-diagonal" portion of matrix 5588 . oj - column indices 5589 - oa - matrix values 5590 5591 Output Parameter: 5592 . mat - the matrix 5593 5594 Level: advanced 5595 5596 Notes: 5597 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5598 must free the arrays once the matrix has been destroyed and not before. 5599 5600 The i and j indices are 0 based 5601 5602 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5603 5604 This sets local rows and cannot be used to set off-processor values. 5605 5606 You cannot later use MatSetValues() to change values in this matrix. 5607 5608 .keywords: matrix, aij, compressed row, sparse, parallel 5609 5610 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5611 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 5612 @*/ 5613 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5614 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5615 { 5616 PetscErrorCode ierr; 5617 Mat_MPIAIJ *maij; 5618 5619 PetscFunctionBegin; 5620 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5621 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5622 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5623 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5624 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5625 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5626 maij = (Mat_MPIAIJ*) (*mat)->data; 5627 maij->donotstash = PETSC_TRUE; 5628 (*mat)->preallocated = PETSC_TRUE; 5629 5630 ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 5631 ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5632 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5633 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5634 5635 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5636 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5637 5638 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5639 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5640 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5641 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5642 5643 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5644 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5645 PetscFunctionReturn(0); 5646 } 5647 5648 /* 5649 Special version for direct calls from Fortran 5650 */ 5651 #include <private/fortranimpl.h> 5652 5653 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5654 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5655 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5656 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5657 #endif 5658 5659 /* Change these macros so can be used in void function */ 5660 #undef CHKERRQ 5661 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5662 #undef SETERRQ2 5663 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5664 #undef SETERRQ 5665 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5666 5667 EXTERN_C_BEGIN 5668 #undef __FUNCT__ 5669 #define __FUNCT__ "matsetvaluesmpiaij_" 5670 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5671 { 5672 Mat mat = *mmat; 5673 PetscInt m = *mm, n = *mn; 5674 InsertMode addv = *maddv; 5675 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5676 PetscScalar value; 5677 PetscErrorCode ierr; 5678 5679 ierr = MatPreallocated(mat);CHKERRQ(ierr); 5680 if (mat->insertmode == NOT_SET_VALUES) { 5681 mat->insertmode = addv; 5682 } 5683 #if defined(PETSC_USE_DEBUG) 5684 else if (mat->insertmode != addv) { 5685 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5686 } 5687 #endif 5688 { 5689 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5690 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5691 PetscBool roworiented = aij->roworiented; 5692 5693 /* Some Variables required in the macro */ 5694 Mat A = aij->A; 5695 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5696 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5697 MatScalar *aa = a->a; 5698 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5699 Mat B = aij->B; 5700 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5701 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5702 MatScalar *ba = b->a; 5703 5704 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5705 PetscInt nonew = a->nonew; 5706 MatScalar *ap1,*ap2; 5707 5708 PetscFunctionBegin; 5709 for (i=0; i<m; i++) { 5710 if (im[i] < 0) continue; 5711 #if defined(PETSC_USE_DEBUG) 5712 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5713 #endif 5714 if (im[i] >= rstart && im[i] < rend) { 5715 row = im[i] - rstart; 5716 lastcol1 = -1; 5717 rp1 = aj + ai[row]; 5718 ap1 = aa + ai[row]; 5719 rmax1 = aimax[row]; 5720 nrow1 = ailen[row]; 5721 low1 = 0; 5722 high1 = nrow1; 5723 lastcol2 = -1; 5724 rp2 = bj + bi[row]; 5725 ap2 = ba + bi[row]; 5726 rmax2 = bimax[row]; 5727 nrow2 = bilen[row]; 5728 low2 = 0; 5729 high2 = nrow2; 5730 5731 for (j=0; j<n; j++) { 5732 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5733 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5734 if (in[j] >= cstart && in[j] < cend){ 5735 col = in[j] - cstart; 5736 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5737 } else if (in[j] < 0) continue; 5738 #if defined(PETSC_USE_DEBUG) 5739 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5740 #endif 5741 else { 5742 if (mat->was_assembled) { 5743 if (!aij->colmap) { 5744 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5745 } 5746 #if defined (PETSC_USE_CTABLE) 5747 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5748 col--; 5749 #else 5750 col = aij->colmap[in[j]] - 1; 5751 #endif 5752 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5753 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5754 col = in[j]; 5755 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5756 B = aij->B; 5757 b = (Mat_SeqAIJ*)B->data; 5758 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5759 rp2 = bj + bi[row]; 5760 ap2 = ba + bi[row]; 5761 rmax2 = bimax[row]; 5762 nrow2 = bilen[row]; 5763 low2 = 0; 5764 high2 = nrow2; 5765 bm = aij->B->rmap->n; 5766 ba = b->a; 5767 } 5768 } else col = in[j]; 5769 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5770 } 5771 } 5772 } else { 5773 if (!aij->donotstash) { 5774 if (roworiented) { 5775 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5776 } else { 5777 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5778 } 5779 } 5780 } 5781 }} 5782 PetscFunctionReturnVoid(); 5783 } 5784 EXTERN_C_END 5785 5786