1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 /*MC 6 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 7 8 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 9 and MATMPIAIJ otherwise. As a result, for single process communicators, 10 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 11 for communicators controlling multiple processes. It is recommended that you call both of 12 the above preallocation routines for simplicity. 13 14 Options Database Keys: 15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 16 17 Developer Notes: Subclasses include MATAIJCUSP, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 18 enough exist. 19 20 Level: beginner 21 22 .seealso: MatCreateMPIAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 23 M*/ 24 25 /*MC 26 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 27 28 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 29 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 30 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 31 for communicators controlling multiple processes. It is recommended that you call both of 32 the above preallocation routines for simplicity. 33 34 Options Database Keys: 35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 36 37 Level: beginner 38 39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 40 M*/ 41 42 #undef __FUNCT__ 43 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ" 44 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows) 45 { 46 PetscErrorCode ierr; 47 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 48 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 49 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 50 const PetscInt *ia,*ib; 51 const MatScalar *aa,*bb; 52 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 53 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 54 55 PetscFunctionBegin; 56 *keptrows = 0; 57 ia = a->i; 58 ib = b->i; 59 for (i=0; i<m; i++) { 60 na = ia[i+1] - ia[i]; 61 nb = ib[i+1] - ib[i]; 62 if (!na && !nb) { 63 cnt++; 64 goto ok1; 65 } 66 aa = a->a + ia[i]; 67 for (j=0; j<na; j++) { 68 if (aa[j] != 0.0) goto ok1; 69 } 70 bb = b->a + ib[i]; 71 for (j=0; j <nb; j++) { 72 if (bb[j] != 0.0) goto ok1; 73 } 74 cnt++; 75 ok1:; 76 } 77 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr); 78 if (!n0rows) PetscFunctionReturn(0); 79 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 80 cnt = 0; 81 for (i=0; i<m; i++) { 82 na = ia[i+1] - ia[i]; 83 nb = ib[i+1] - ib[i]; 84 if (!na && !nb) continue; 85 aa = a->a + ia[i]; 86 for(j=0; j<na;j++) { 87 if (aa[j] != 0.0) { 88 rows[cnt++] = rstart + i; 89 goto ok2; 90 } 91 } 92 bb = b->a + ib[i]; 93 for (j=0; j<nb; j++) { 94 if (bb[j] != 0.0) { 95 rows[cnt++] = rstart + i; 96 goto ok2; 97 } 98 } 99 ok2:; 100 } 101 ierr = ISCreateGeneral(PETSC_COMM_WORLD,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 102 PetscFunctionReturn(0); 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 107 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 108 { 109 PetscErrorCode ierr; 110 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 111 PetscInt i,n,*garray = aij->garray; 112 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 113 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 114 PetscReal *work; 115 116 PetscFunctionBegin; 117 ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr); 118 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 119 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 120 if (type == NORM_2) { 121 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 122 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 123 } 124 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 125 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 126 } 127 } else if (type == NORM_1) { 128 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 129 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 130 } 131 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 132 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 133 } 134 } else if (type == NORM_INFINITY) { 135 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 136 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 137 } 138 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 139 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 140 } 141 142 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType"); 143 if (type == NORM_INFINITY) { 144 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 145 } else { 146 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 147 } 148 ierr = PetscFree(work);CHKERRQ(ierr); 149 if (type == NORM_2) { 150 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 151 } 152 PetscFunctionReturn(0); 153 } 154 155 #undef __FUNCT__ 156 #define __FUNCT__ "MatDistribute_MPIAIJ" 157 /* 158 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 159 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 160 161 Only for square matrices 162 */ 163 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 164 { 165 PetscMPIInt rank,size; 166 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 167 PetscErrorCode ierr; 168 Mat mat; 169 Mat_SeqAIJ *gmata; 170 PetscMPIInt tag; 171 MPI_Status status; 172 PetscBool aij; 173 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 174 175 PetscFunctionBegin; 176 CHKMEMQ; 177 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 178 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 179 if (!rank) { 180 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 181 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 182 } 183 if (reuse == MAT_INITIAL_MATRIX) { 184 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 185 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 186 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 187 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 188 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 189 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 190 rowners[0] = 0; 191 for (i=2; i<=size; i++) { 192 rowners[i] += rowners[i-1]; 193 } 194 rstart = rowners[rank]; 195 rend = rowners[rank+1]; 196 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 197 if (!rank) { 198 gmata = (Mat_SeqAIJ*) gmat->data; 199 /* send row lengths to all processors */ 200 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 201 for (i=1; i<size; i++) { 202 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 203 } 204 /* determine number diagonal and off-diagonal counts */ 205 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 206 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 207 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 208 jj = 0; 209 for (i=0; i<m; i++) { 210 for (j=0; j<dlens[i]; j++) { 211 if (gmata->j[jj] < rstart) ld[i]++; 212 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 213 jj++; 214 } 215 } 216 /* send column indices to other processes */ 217 for (i=1; i<size; i++) { 218 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 219 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 220 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 221 } 222 223 /* send numerical values to other processes */ 224 for (i=1; i<size; i++) { 225 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 226 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 227 } 228 gmataa = gmata->a; 229 gmataj = gmata->j; 230 231 } else { 232 /* receive row lengths */ 233 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 234 /* receive column indices */ 235 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 236 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 237 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 238 /* determine number diagonal and off-diagonal counts */ 239 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 240 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 241 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 242 jj = 0; 243 for (i=0; i<m; i++) { 244 for (j=0; j<dlens[i]; j++) { 245 if (gmataj[jj] < rstart) ld[i]++; 246 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 247 jj++; 248 } 249 } 250 /* receive numerical values */ 251 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 252 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 253 } 254 /* set preallocation */ 255 for (i=0; i<m; i++) { 256 dlens[i] -= olens[i]; 257 } 258 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 259 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 260 261 for (i=0; i<m; i++) { 262 dlens[i] += olens[i]; 263 } 264 cnt = 0; 265 for (i=0; i<m; i++) { 266 row = rstart + i; 267 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 268 cnt += dlens[i]; 269 } 270 if (rank) { 271 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 272 } 273 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 274 ierr = PetscFree(rowners);CHKERRQ(ierr); 275 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 276 *inmat = mat; 277 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 278 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 279 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 280 mat = *inmat; 281 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 282 if (!rank) { 283 /* send numerical values to other processes */ 284 gmata = (Mat_SeqAIJ*) gmat->data; 285 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 286 gmataa = gmata->a; 287 for (i=1; i<size; i++) { 288 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 289 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 290 } 291 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 292 } else { 293 /* receive numerical values from process 0*/ 294 nz = Ad->nz + Ao->nz; 295 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 296 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 297 } 298 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 299 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 300 ad = Ad->a; 301 ao = Ao->a; 302 if (mat->rmap->n) { 303 i = 0; 304 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 305 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 306 } 307 for (i=1; i<mat->rmap->n; i++) { 308 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 309 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 310 } 311 i--; 312 if (mat->rmap->n) { 313 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 314 } 315 if (rank) { 316 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 317 } 318 } 319 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 320 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 321 CHKMEMQ; 322 PetscFunctionReturn(0); 323 } 324 325 /* 326 Local utility routine that creates a mapping from the global column 327 number to the local number in the off-diagonal part of the local 328 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 329 a slightly higher hash table cost; without it it is not scalable (each processor 330 has an order N integer array but is fast to acess. 331 */ 332 #undef __FUNCT__ 333 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 334 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 335 { 336 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 337 PetscErrorCode ierr; 338 PetscInt n = aij->B->cmap->n,i; 339 340 PetscFunctionBegin; 341 #if defined (PETSC_USE_CTABLE) 342 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 343 for (i=0; i<n; i++){ 344 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 345 } 346 #else 347 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 348 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 349 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 350 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 351 #endif 352 PetscFunctionReturn(0); 353 } 354 355 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 356 { \ 357 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 358 lastcol1 = col;\ 359 while (high1-low1 > 5) { \ 360 t = (low1+high1)/2; \ 361 if (rp1[t] > col) high1 = t; \ 362 else low1 = t; \ 363 } \ 364 for (_i=low1; _i<high1; _i++) { \ 365 if (rp1[_i] > col) break; \ 366 if (rp1[_i] == col) { \ 367 if (addv == ADD_VALUES) ap1[_i] += value; \ 368 else ap1[_i] = value; \ 369 goto a_noinsert; \ 370 } \ 371 } \ 372 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 373 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 374 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 375 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 376 N = nrow1++ - 1; a->nz++; high1++; \ 377 /* shift up all the later entries in this row */ \ 378 for (ii=N; ii>=_i; ii--) { \ 379 rp1[ii+1] = rp1[ii]; \ 380 ap1[ii+1] = ap1[ii]; \ 381 } \ 382 rp1[_i] = col; \ 383 ap1[_i] = value; \ 384 a_noinsert: ; \ 385 ailen[row] = nrow1; \ 386 } 387 388 389 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 390 { \ 391 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 392 lastcol2 = col;\ 393 while (high2-low2 > 5) { \ 394 t = (low2+high2)/2; \ 395 if (rp2[t] > col) high2 = t; \ 396 else low2 = t; \ 397 } \ 398 for (_i=low2; _i<high2; _i++) { \ 399 if (rp2[_i] > col) break; \ 400 if (rp2[_i] == col) { \ 401 if (addv == ADD_VALUES) ap2[_i] += value; \ 402 else ap2[_i] = value; \ 403 goto b_noinsert; \ 404 } \ 405 } \ 406 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 407 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 408 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 409 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 410 N = nrow2++ - 1; b->nz++; high2++; \ 411 /* shift up all the later entries in this row */ \ 412 for (ii=N; ii>=_i; ii--) { \ 413 rp2[ii+1] = rp2[ii]; \ 414 ap2[ii+1] = ap2[ii]; \ 415 } \ 416 rp2[_i] = col; \ 417 ap2[_i] = value; \ 418 b_noinsert: ; \ 419 bilen[row] = nrow2; \ 420 } 421 422 #undef __FUNCT__ 423 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 424 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 425 { 426 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 427 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 428 PetscErrorCode ierr; 429 PetscInt l,*garray = mat->garray,diag; 430 431 PetscFunctionBegin; 432 /* code only works for square matrices A */ 433 434 /* find size of row to the left of the diagonal part */ 435 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 436 row = row - diag; 437 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 438 if (garray[b->j[b->i[row]+l]] > diag) break; 439 } 440 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 441 442 /* diagonal part */ 443 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 444 445 /* right of diagonal part */ 446 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 447 PetscFunctionReturn(0); 448 } 449 450 #undef __FUNCT__ 451 #define __FUNCT__ "MatSetValues_MPIAIJ" 452 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 453 { 454 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 455 PetscScalar value; 456 PetscErrorCode ierr; 457 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 458 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 459 PetscBool roworiented = aij->roworiented; 460 461 /* Some Variables required in the macro */ 462 Mat A = aij->A; 463 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 464 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 465 MatScalar *aa = a->a; 466 PetscBool ignorezeroentries = a->ignorezeroentries; 467 Mat B = aij->B; 468 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 469 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 470 MatScalar *ba = b->a; 471 472 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 473 PetscInt nonew = a->nonew; 474 MatScalar *ap1,*ap2; 475 476 PetscFunctionBegin; 477 if (v) PetscValidScalarPointer(v,6); 478 for (i=0; i<m; i++) { 479 if (im[i] < 0) continue; 480 #if defined(PETSC_USE_DEBUG) 481 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 482 #endif 483 if (im[i] >= rstart && im[i] < rend) { 484 row = im[i] - rstart; 485 lastcol1 = -1; 486 rp1 = aj + ai[row]; 487 ap1 = aa + ai[row]; 488 rmax1 = aimax[row]; 489 nrow1 = ailen[row]; 490 low1 = 0; 491 high1 = nrow1; 492 lastcol2 = -1; 493 rp2 = bj + bi[row]; 494 ap2 = ba + bi[row]; 495 rmax2 = bimax[row]; 496 nrow2 = bilen[row]; 497 low2 = 0; 498 high2 = nrow2; 499 500 for (j=0; j<n; j++) { 501 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 502 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 503 if (in[j] >= cstart && in[j] < cend){ 504 col = in[j] - cstart; 505 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 506 } else if (in[j] < 0) continue; 507 #if defined(PETSC_USE_DEBUG) 508 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 509 #endif 510 else { 511 if (mat->was_assembled) { 512 if (!aij->colmap) { 513 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 514 } 515 #if defined (PETSC_USE_CTABLE) 516 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 517 col--; 518 #else 519 col = aij->colmap[in[j]] - 1; 520 #endif 521 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 522 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 523 col = in[j]; 524 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 525 B = aij->B; 526 b = (Mat_SeqAIJ*)B->data; 527 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 528 rp2 = bj + bi[row]; 529 ap2 = ba + bi[row]; 530 rmax2 = bimax[row]; 531 nrow2 = bilen[row]; 532 low2 = 0; 533 high2 = nrow2; 534 bm = aij->B->rmap->n; 535 ba = b->a; 536 } 537 } else col = in[j]; 538 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 539 } 540 } 541 } else { 542 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 543 if (!aij->donotstash) { 544 if (roworiented) { 545 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 546 } else { 547 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 548 } 549 } 550 } 551 } 552 PetscFunctionReturn(0); 553 } 554 555 #undef __FUNCT__ 556 #define __FUNCT__ "MatGetValues_MPIAIJ" 557 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 558 { 559 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 560 PetscErrorCode ierr; 561 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 562 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 563 564 PetscFunctionBegin; 565 for (i=0; i<m; i++) { 566 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 567 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 568 if (idxm[i] >= rstart && idxm[i] < rend) { 569 row = idxm[i] - rstart; 570 for (j=0; j<n; j++) { 571 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 572 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 573 if (idxn[j] >= cstart && idxn[j] < cend){ 574 col = idxn[j] - cstart; 575 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 576 } else { 577 if (!aij->colmap) { 578 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 579 } 580 #if defined (PETSC_USE_CTABLE) 581 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 582 col --; 583 #else 584 col = aij->colmap[idxn[j]] - 1; 585 #endif 586 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 587 else { 588 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 589 } 590 } 591 } 592 } else { 593 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 594 } 595 } 596 PetscFunctionReturn(0); 597 } 598 599 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 600 601 #undef __FUNCT__ 602 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 603 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 604 { 605 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 606 PetscErrorCode ierr; 607 PetscInt nstash,reallocs; 608 InsertMode addv; 609 610 PetscFunctionBegin; 611 if (aij->donotstash || mat->nooffprocentries) { 612 PetscFunctionReturn(0); 613 } 614 615 /* make sure all processors are either in INSERTMODE or ADDMODE */ 616 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 617 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 618 mat->insertmode = addv; /* in case this processor had no cache */ 619 620 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 621 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 622 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 623 PetscFunctionReturn(0); 624 } 625 626 #undef __FUNCT__ 627 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 628 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 629 { 630 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 631 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 632 PetscErrorCode ierr; 633 PetscMPIInt n; 634 PetscInt i,j,rstart,ncols,flg; 635 PetscInt *row,*col; 636 PetscBool other_disassembled; 637 PetscScalar *val; 638 InsertMode addv = mat->insertmode; 639 640 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 641 PetscFunctionBegin; 642 if (!aij->donotstash && !mat->nooffprocentries) { 643 while (1) { 644 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 645 if (!flg) break; 646 647 for (i=0; i<n;) { 648 /* Now identify the consecutive vals belonging to the same row */ 649 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 650 if (j < n) ncols = j-i; 651 else ncols = n-i; 652 /* Now assemble all these values with a single function call */ 653 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 654 i = j; 655 } 656 } 657 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 658 } 659 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 660 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 661 662 /* determine if any processor has disassembled, if so we must 663 also disassemble ourselfs, in order that we may reassemble. */ 664 /* 665 if nonzero structure of submatrix B cannot change then we know that 666 no processor disassembled thus we can skip this stuff 667 */ 668 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 669 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 670 if (mat->was_assembled && !other_disassembled) { 671 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 672 } 673 } 674 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 675 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 676 } 677 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 678 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 679 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 680 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 681 682 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 683 aij->rowvalues = 0; 684 685 /* used by MatAXPY() */ 686 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 687 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 688 689 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 690 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 691 PetscFunctionReturn(0); 692 } 693 694 #undef __FUNCT__ 695 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 696 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 697 { 698 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 699 PetscErrorCode ierr; 700 701 PetscFunctionBegin; 702 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 703 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 704 PetscFunctionReturn(0); 705 } 706 707 #undef __FUNCT__ 708 #define __FUNCT__ "MatZeroRows_MPIAIJ" 709 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 710 { 711 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 712 PetscErrorCode ierr; 713 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 714 PetscInt i,*owners = A->rmap->range; 715 PetscInt *nprocs,j,idx,nsends,row; 716 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 717 PetscInt *rvalues,count,base,slen,*source; 718 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 719 MPI_Comm comm = ((PetscObject)A)->comm; 720 MPI_Request *send_waits,*recv_waits; 721 MPI_Status recv_status,*send_status; 722 const PetscScalar *xx; 723 PetscScalar *bb; 724 #if defined(PETSC_DEBUG) 725 PetscBool found = PETSC_FALSE; 726 #endif 727 728 PetscFunctionBegin; 729 /* first count number of contributors to each processor */ 730 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 731 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 732 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 733 j = 0; 734 for (i=0; i<N; i++) { 735 if (lastidx > (idx = rows[i])) j = 0; 736 lastidx = idx; 737 for (; j<size; j++) { 738 if (idx >= owners[j] && idx < owners[j+1]) { 739 nprocs[2*j]++; 740 nprocs[2*j+1] = 1; 741 owner[i] = j; 742 #if defined(PETSC_DEBUG) 743 found = PETSC_TRUE; 744 #endif 745 break; 746 } 747 } 748 #if defined(PETSC_DEBUG) 749 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 750 found = PETSC_FALSE; 751 #endif 752 } 753 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 754 755 if (A->nooffproczerorows) { 756 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 757 nrecvs = nsends; 758 nmax = N; 759 } else { 760 /* inform other processors of number of messages and max length*/ 761 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 762 } 763 764 /* post receives: */ 765 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 766 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 767 for (i=0; i<nrecvs; i++) { 768 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 769 } 770 771 /* do sends: 772 1) starts[i] gives the starting index in svalues for stuff going to 773 the ith processor 774 */ 775 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 776 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 777 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 778 starts[0] = 0; 779 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 780 for (i=0; i<N; i++) { 781 svalues[starts[owner[i]]++] = rows[i]; 782 } 783 784 starts[0] = 0; 785 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 786 count = 0; 787 for (i=0; i<size; i++) { 788 if (nprocs[2*i+1]) { 789 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 790 } 791 } 792 ierr = PetscFree(starts);CHKERRQ(ierr); 793 794 base = owners[rank]; 795 796 /* wait on receives */ 797 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 798 count = nrecvs; slen = 0; 799 while (count) { 800 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 801 /* unpack receives into our local space */ 802 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 803 source[imdex] = recv_status.MPI_SOURCE; 804 lens[imdex] = n; 805 slen += n; 806 count--; 807 } 808 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 809 810 /* move the data into the send scatter */ 811 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 812 count = 0; 813 for (i=0; i<nrecvs; i++) { 814 values = rvalues + i*nmax; 815 for (j=0; j<lens[i]; j++) { 816 lrows[count++] = values[j] - base; 817 } 818 } 819 ierr = PetscFree(rvalues);CHKERRQ(ierr); 820 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 821 ierr = PetscFree(owner);CHKERRQ(ierr); 822 ierr = PetscFree(nprocs);CHKERRQ(ierr); 823 824 /* fix right hand side if needed */ 825 if (x && b) { 826 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 827 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 828 for (i=0; i<slen; i++) { 829 bb[lrows[i]] = diag*xx[lrows[i]]; 830 } 831 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 832 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 833 } 834 /* 835 Zero the required rows. If the "diagonal block" of the matrix 836 is square and the user wishes to set the diagonal we use separate 837 code so that MatSetValues() is not called for each diagonal allocating 838 new memory, thus calling lots of mallocs and slowing things down. 839 840 */ 841 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 842 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 843 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 844 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 845 } else if (diag != 0.0) { 846 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 847 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 848 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 849 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 850 } 851 for (i = 0; i < slen; i++) { 852 row = lrows[i] + rstart; 853 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 854 } 855 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 856 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 857 } else { 858 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 859 } 860 ierr = PetscFree(lrows);CHKERRQ(ierr); 861 862 /* wait on sends */ 863 if (nsends) { 864 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 865 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 866 ierr = PetscFree(send_status);CHKERRQ(ierr); 867 } 868 ierr = PetscFree(send_waits);CHKERRQ(ierr); 869 ierr = PetscFree(svalues);CHKERRQ(ierr); 870 PetscFunctionReturn(0); 871 } 872 873 #undef __FUNCT__ 874 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 875 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 876 { 877 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 878 PetscErrorCode ierr; 879 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 880 PetscInt i,*owners = A->rmap->range; 881 PetscInt *nprocs,j,idx,nsends; 882 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 883 PetscInt *rvalues,count,base,slen,*source; 884 PetscInt *lens,*lrows,*values,m; 885 MPI_Comm comm = ((PetscObject)A)->comm; 886 MPI_Request *send_waits,*recv_waits; 887 MPI_Status recv_status,*send_status; 888 const PetscScalar *xx; 889 PetscScalar *bb,*mask; 890 Vec xmask,lmask; 891 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 892 const PetscInt *aj, *ii,*ridx; 893 PetscScalar *aa; 894 #if defined(PETSC_DEBUG) 895 PetscBool found = PETSC_FALSE; 896 #endif 897 898 PetscFunctionBegin; 899 /* first count number of contributors to each processor */ 900 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 901 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 902 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 903 j = 0; 904 for (i=0; i<N; i++) { 905 if (lastidx > (idx = rows[i])) j = 0; 906 lastidx = idx; 907 for (; j<size; j++) { 908 if (idx >= owners[j] && idx < owners[j+1]) { 909 nprocs[2*j]++; 910 nprocs[2*j+1] = 1; 911 owner[i] = j; 912 #if defined(PETSC_DEBUG) 913 found = PETSC_TRUE; 914 #endif 915 break; 916 } 917 } 918 #if defined(PETSC_DEBUG) 919 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 920 found = PETSC_FALSE; 921 #endif 922 } 923 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 924 925 /* inform other processors of number of messages and max length*/ 926 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 927 928 /* post receives: */ 929 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 930 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 931 for (i=0; i<nrecvs; i++) { 932 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 933 } 934 935 /* do sends: 936 1) starts[i] gives the starting index in svalues for stuff going to 937 the ith processor 938 */ 939 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 940 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 941 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 942 starts[0] = 0; 943 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 944 for (i=0; i<N; i++) { 945 svalues[starts[owner[i]]++] = rows[i]; 946 } 947 948 starts[0] = 0; 949 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 950 count = 0; 951 for (i=0; i<size; i++) { 952 if (nprocs[2*i+1]) { 953 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 954 } 955 } 956 ierr = PetscFree(starts);CHKERRQ(ierr); 957 958 base = owners[rank]; 959 960 /* wait on receives */ 961 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 962 count = nrecvs; slen = 0; 963 while (count) { 964 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 965 /* unpack receives into our local space */ 966 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 967 source[imdex] = recv_status.MPI_SOURCE; 968 lens[imdex] = n; 969 slen += n; 970 count--; 971 } 972 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 973 974 /* move the data into the send scatter */ 975 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 976 count = 0; 977 for (i=0; i<nrecvs; i++) { 978 values = rvalues + i*nmax; 979 for (j=0; j<lens[i]; j++) { 980 lrows[count++] = values[j] - base; 981 } 982 } 983 ierr = PetscFree(rvalues);CHKERRQ(ierr); 984 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 985 ierr = PetscFree(owner);CHKERRQ(ierr); 986 ierr = PetscFree(nprocs);CHKERRQ(ierr); 987 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 988 989 /* zero diagonal part of matrix */ 990 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 991 992 /* handle off diagonal part of matrix */ 993 ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr); 994 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 995 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 996 for (i=0; i<slen; i++) { 997 bb[lrows[i]] = 1; 998 } 999 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1000 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1001 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1002 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1003 if (x) { 1004 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1005 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1006 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1007 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1008 } 1009 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1010 1011 /* remove zeroed rows of off diagonal matrix */ 1012 ii = aij->i; 1013 for (i=0; i<slen; i++) { 1014 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1015 } 1016 1017 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1018 if (aij->compressedrow.use){ 1019 m = aij->compressedrow.nrows; 1020 ii = aij->compressedrow.i; 1021 ridx = aij->compressedrow.rindex; 1022 for (i=0; i<m; i++){ 1023 n = ii[i+1] - ii[i]; 1024 aj = aij->j + ii[i]; 1025 aa = aij->a + ii[i]; 1026 1027 for (j=0; j<n; j++) { 1028 if (PetscAbsScalar(mask[*aj])) { 1029 if (b) bb[*ridx] -= *aa*xx[*aj]; 1030 *aa = 0.0; 1031 } 1032 aa++; 1033 aj++; 1034 } 1035 ridx++; 1036 } 1037 } else { /* do not use compressed row format */ 1038 m = l->B->rmap->n; 1039 for (i=0; i<m; i++) { 1040 n = ii[i+1] - ii[i]; 1041 aj = aij->j + ii[i]; 1042 aa = aij->a + ii[i]; 1043 for (j=0; j<n; j++) { 1044 if (PetscAbsScalar(mask[*aj])) { 1045 if (b) bb[i] -= *aa*xx[*aj]; 1046 *aa = 0.0; 1047 } 1048 aa++; 1049 aj++; 1050 } 1051 } 1052 } 1053 if (x) { 1054 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1055 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1056 } 1057 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1058 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1059 ierr = PetscFree(lrows);CHKERRQ(ierr); 1060 1061 /* wait on sends */ 1062 if (nsends) { 1063 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1064 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1065 ierr = PetscFree(send_status);CHKERRQ(ierr); 1066 } 1067 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1068 ierr = PetscFree(svalues);CHKERRQ(ierr); 1069 1070 PetscFunctionReturn(0); 1071 } 1072 1073 #undef __FUNCT__ 1074 #define __FUNCT__ "MatMult_MPIAIJ" 1075 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1076 { 1077 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1078 PetscErrorCode ierr; 1079 PetscInt nt; 1080 1081 PetscFunctionBegin; 1082 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1083 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1084 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1085 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1086 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1087 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1088 PetscFunctionReturn(0); 1089 } 1090 1091 #undef __FUNCT__ 1092 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1093 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1094 { 1095 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1096 PetscErrorCode ierr; 1097 1098 PetscFunctionBegin; 1099 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1100 PetscFunctionReturn(0); 1101 } 1102 1103 #undef __FUNCT__ 1104 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1105 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1106 { 1107 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1108 PetscErrorCode ierr; 1109 1110 PetscFunctionBegin; 1111 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1112 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1113 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1114 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1115 PetscFunctionReturn(0); 1116 } 1117 1118 #undef __FUNCT__ 1119 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1120 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1121 { 1122 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1123 PetscErrorCode ierr; 1124 PetscBool merged; 1125 1126 PetscFunctionBegin; 1127 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1128 /* do nondiagonal part */ 1129 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1130 if (!merged) { 1131 /* send it on its way */ 1132 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1133 /* do local part */ 1134 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1135 /* receive remote parts: note this assumes the values are not actually */ 1136 /* added in yy until the next line, */ 1137 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1138 } else { 1139 /* do local part */ 1140 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1141 /* send it on its way */ 1142 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1143 /* values actually were received in the Begin() but we need to call this nop */ 1144 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1145 } 1146 PetscFunctionReturn(0); 1147 } 1148 1149 EXTERN_C_BEGIN 1150 #undef __FUNCT__ 1151 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1152 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1153 { 1154 MPI_Comm comm; 1155 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 1156 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1157 IS Me,Notme; 1158 PetscErrorCode ierr; 1159 PetscInt M,N,first,last,*notme,i; 1160 PetscMPIInt size; 1161 1162 PetscFunctionBegin; 1163 1164 /* Easy test: symmetric diagonal block */ 1165 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 1166 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1167 if (!*f) PetscFunctionReturn(0); 1168 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1169 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1170 if (size == 1) PetscFunctionReturn(0); 1171 1172 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1173 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1174 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1175 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1176 for (i=0; i<first; i++) notme[i] = i; 1177 for (i=last; i<M; i++) notme[i-last+first] = i; 1178 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1179 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1180 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1181 Aoff = Aoffs[0]; 1182 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1183 Boff = Boffs[0]; 1184 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1185 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1186 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1187 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1188 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1189 ierr = PetscFree(notme);CHKERRQ(ierr); 1190 PetscFunctionReturn(0); 1191 } 1192 EXTERN_C_END 1193 1194 #undef __FUNCT__ 1195 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1196 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1197 { 1198 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1199 PetscErrorCode ierr; 1200 1201 PetscFunctionBegin; 1202 /* do nondiagonal part */ 1203 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1204 /* send it on its way */ 1205 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1206 /* do local part */ 1207 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1208 /* receive remote parts */ 1209 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1210 PetscFunctionReturn(0); 1211 } 1212 1213 /* 1214 This only works correctly for square matrices where the subblock A->A is the 1215 diagonal block 1216 */ 1217 #undef __FUNCT__ 1218 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1219 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1220 { 1221 PetscErrorCode ierr; 1222 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1223 1224 PetscFunctionBegin; 1225 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1226 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1227 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1228 PetscFunctionReturn(0); 1229 } 1230 1231 #undef __FUNCT__ 1232 #define __FUNCT__ "MatScale_MPIAIJ" 1233 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1234 { 1235 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1236 PetscErrorCode ierr; 1237 1238 PetscFunctionBegin; 1239 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1240 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1241 PetscFunctionReturn(0); 1242 } 1243 1244 #undef __FUNCT__ 1245 #define __FUNCT__ "MatDestroy_MPIAIJ" 1246 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1247 { 1248 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1249 PetscErrorCode ierr; 1250 1251 PetscFunctionBegin; 1252 #if defined(PETSC_USE_LOG) 1253 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1254 #endif 1255 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1256 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1257 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1258 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1259 #if defined (PETSC_USE_CTABLE) 1260 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1261 #else 1262 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1263 #endif 1264 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1265 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1266 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1267 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1268 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1269 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1270 1271 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1272 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1273 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1274 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1275 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1276 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1277 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1278 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1279 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1280 PetscFunctionReturn(0); 1281 } 1282 1283 #undef __FUNCT__ 1284 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1285 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1286 { 1287 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1288 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 1289 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 1290 PetscErrorCode ierr; 1291 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1292 int fd; 1293 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1294 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1295 PetscScalar *column_values; 1296 PetscInt message_count,flowcontrolcount; 1297 1298 PetscFunctionBegin; 1299 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1300 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1301 nz = A->nz + B->nz; 1302 if (!rank) { 1303 header[0] = MAT_FILE_CLASSID; 1304 header[1] = mat->rmap->N; 1305 header[2] = mat->cmap->N; 1306 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1307 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1308 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1309 /* get largest number of rows any processor has */ 1310 rlen = mat->rmap->n; 1311 range = mat->rmap->range; 1312 for (i=1; i<size; i++) { 1313 rlen = PetscMax(rlen,range[i+1] - range[i]); 1314 } 1315 } else { 1316 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1317 rlen = mat->rmap->n; 1318 } 1319 1320 /* load up the local row counts */ 1321 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1322 for (i=0; i<mat->rmap->n; i++) { 1323 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1324 } 1325 1326 /* store the row lengths to the file */ 1327 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1328 if (!rank) { 1329 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1330 for (i=1; i<size; i++) { 1331 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1332 rlen = range[i+1] - range[i]; 1333 ierr = MPILong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1334 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1335 } 1336 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1337 } else { 1338 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1339 ierr = MPILong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1340 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1341 } 1342 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1343 1344 /* load up the local column indices */ 1345 nzmax = nz; /* )th processor needs space a largest processor needs */ 1346 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1347 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1348 cnt = 0; 1349 for (i=0; i<mat->rmap->n; i++) { 1350 for (j=B->i[i]; j<B->i[i+1]; j++) { 1351 if ( (col = garray[B->j[j]]) > cstart) break; 1352 column_indices[cnt++] = col; 1353 } 1354 for (k=A->i[i]; k<A->i[i+1]; k++) { 1355 column_indices[cnt++] = A->j[k] + cstart; 1356 } 1357 for (; j<B->i[i+1]; j++) { 1358 column_indices[cnt++] = garray[B->j[j]]; 1359 } 1360 } 1361 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1362 1363 /* store the column indices to the file */ 1364 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1365 if (!rank) { 1366 MPI_Status status; 1367 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1368 for (i=1; i<size; i++) { 1369 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1370 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1371 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1372 ierr = MPILong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1373 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1374 } 1375 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1376 } else { 1377 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1378 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1379 ierr = MPILong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1380 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1381 } 1382 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1383 1384 /* load up the local column values */ 1385 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1386 cnt = 0; 1387 for (i=0; i<mat->rmap->n; i++) { 1388 for (j=B->i[i]; j<B->i[i+1]; j++) { 1389 if ( garray[B->j[j]] > cstart) break; 1390 column_values[cnt++] = B->a[j]; 1391 } 1392 for (k=A->i[i]; k<A->i[i+1]; k++) { 1393 column_values[cnt++] = A->a[k]; 1394 } 1395 for (; j<B->i[i+1]; j++) { 1396 column_values[cnt++] = B->a[j]; 1397 } 1398 } 1399 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1400 1401 /* store the column values to the file */ 1402 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1403 if (!rank) { 1404 MPI_Status status; 1405 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1406 for (i=1; i<size; i++) { 1407 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1408 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1409 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1410 ierr = MPILong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1411 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1412 } 1413 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1414 } else { 1415 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1416 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1417 ierr = MPILong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1418 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1419 } 1420 ierr = PetscFree(column_values);CHKERRQ(ierr); 1421 PetscFunctionReturn(0); 1422 } 1423 1424 #undef __FUNCT__ 1425 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1426 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1427 { 1428 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1429 PetscErrorCode ierr; 1430 PetscMPIInt rank = aij->rank,size = aij->size; 1431 PetscBool isdraw,iascii,isbinary; 1432 PetscViewer sviewer; 1433 PetscViewerFormat format; 1434 1435 PetscFunctionBegin; 1436 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1437 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1438 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1439 if (iascii) { 1440 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1441 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1442 MatInfo info; 1443 PetscBool inodes; 1444 1445 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1446 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1447 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1448 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1449 if (!inodes) { 1450 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1451 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1452 } else { 1453 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1454 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1455 } 1456 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1457 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1458 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1459 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1460 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1461 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1462 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1463 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1464 PetscFunctionReturn(0); 1465 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1466 PetscInt inodecount,inodelimit,*inodes; 1467 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1468 if (inodes) { 1469 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1470 } else { 1471 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1472 } 1473 PetscFunctionReturn(0); 1474 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1475 PetscFunctionReturn(0); 1476 } 1477 } else if (isbinary) { 1478 if (size == 1) { 1479 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1480 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1481 } else { 1482 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1483 } 1484 PetscFunctionReturn(0); 1485 } else if (isdraw) { 1486 PetscDraw draw; 1487 PetscBool isnull; 1488 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1489 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1490 } 1491 1492 if (size == 1) { 1493 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1494 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1495 } else { 1496 /* assemble the entire matrix onto first processor. */ 1497 Mat A; 1498 Mat_SeqAIJ *Aloc; 1499 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1500 MatScalar *a; 1501 1502 if (mat->rmap->N > 1024) { 1503 PetscBool flg = PETSC_FALSE; 1504 1505 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1506 if (!flg) { 1507 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1508 } 1509 } 1510 1511 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1512 if (!rank) { 1513 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1514 } else { 1515 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1516 } 1517 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1518 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1519 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1520 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1521 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1522 1523 /* copy over the A part */ 1524 Aloc = (Mat_SeqAIJ*)aij->A->data; 1525 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1526 row = mat->rmap->rstart; 1527 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1528 for (i=0; i<m; i++) { 1529 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1530 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1531 } 1532 aj = Aloc->j; 1533 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1534 1535 /* copy over the B part */ 1536 Aloc = (Mat_SeqAIJ*)aij->B->data; 1537 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1538 row = mat->rmap->rstart; 1539 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1540 ct = cols; 1541 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1542 for (i=0; i<m; i++) { 1543 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1544 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1545 } 1546 ierr = PetscFree(ct);CHKERRQ(ierr); 1547 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1548 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1549 /* 1550 Everyone has to call to draw the matrix since the graphics waits are 1551 synchronized across all processors that share the PetscDraw object 1552 */ 1553 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1554 if (!rank) { 1555 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1556 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1557 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1558 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1559 } 1560 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1561 ierr = MatDestroy(&A);CHKERRQ(ierr); 1562 } 1563 PetscFunctionReturn(0); 1564 } 1565 1566 #undef __FUNCT__ 1567 #define __FUNCT__ "MatView_MPIAIJ" 1568 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1569 { 1570 PetscErrorCode ierr; 1571 PetscBool iascii,isdraw,issocket,isbinary; 1572 1573 PetscFunctionBegin; 1574 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1575 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1576 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1577 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1578 if (iascii || isdraw || isbinary || issocket) { 1579 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1580 } else { 1581 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1582 } 1583 PetscFunctionReturn(0); 1584 } 1585 1586 #undef __FUNCT__ 1587 #define __FUNCT__ "MatSOR_MPIAIJ" 1588 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1589 { 1590 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1591 PetscErrorCode ierr; 1592 Vec bb1 = 0; 1593 PetscBool hasop; 1594 1595 PetscFunctionBegin; 1596 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1597 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1598 } 1599 1600 if (flag == SOR_APPLY_UPPER) { 1601 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1602 PetscFunctionReturn(0); 1603 } 1604 1605 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1606 if (flag & SOR_ZERO_INITIAL_GUESS) { 1607 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1608 its--; 1609 } 1610 1611 while (its--) { 1612 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1613 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1614 1615 /* update rhs: bb1 = bb - B*x */ 1616 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1617 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1618 1619 /* local sweep */ 1620 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1621 } 1622 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1623 if (flag & SOR_ZERO_INITIAL_GUESS) { 1624 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1625 its--; 1626 } 1627 while (its--) { 1628 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1629 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1630 1631 /* update rhs: bb1 = bb - B*x */ 1632 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1633 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1634 1635 /* local sweep */ 1636 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1637 } 1638 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1639 if (flag & SOR_ZERO_INITIAL_GUESS) { 1640 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1641 its--; 1642 } 1643 while (its--) { 1644 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1645 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1646 1647 /* update rhs: bb1 = bb - B*x */ 1648 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1649 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1650 1651 /* local sweep */ 1652 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1653 } 1654 } else if (flag & SOR_EISENSTAT) { 1655 Vec xx1; 1656 1657 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1658 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1659 1660 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1661 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1662 if (!mat->diag) { 1663 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1664 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1665 } 1666 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1667 if (hasop) { 1668 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1669 } else { 1670 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1671 } 1672 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1673 1674 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1675 1676 /* local sweep */ 1677 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1678 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1679 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1680 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1681 1682 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1683 PetscFunctionReturn(0); 1684 } 1685 1686 #undef __FUNCT__ 1687 #define __FUNCT__ "MatPermute_MPIAIJ" 1688 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1689 { 1690 MPI_Comm comm; 1691 PetscInt first,local_rowsize,local_colsize; 1692 const PetscInt *rows; 1693 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1694 PetscErrorCode ierr; 1695 1696 PetscFunctionBegin; 1697 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1698 /* make a collective version of 'rowp', this is to be tolerant of users who pass serial index sets */ 1699 ierr = ISOnComm(rowp,comm,PETSC_USE_POINTER,&crowp);CHKERRQ(ierr); 1700 /* collect the global row permutation and invert it */ 1701 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1702 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1703 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 1704 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1705 ierr = ISDestroy(&growp);CHKERRQ(ierr); 1706 /* get the local target indices */ 1707 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1708 ierr = MatGetLocalSize(A,&local_rowsize,&local_colsize);CHKERRQ(ierr); 1709 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1710 ierr = ISCreateGeneral(PETSC_COMM_SELF,local_rowsize,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 1711 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1712 ierr = ISDestroy(&irowp);CHKERRQ(ierr); 1713 /* the column permutation is so much easier; 1714 make a local version of 'colp' and invert it */ 1715 ierr = ISOnComm(colp,PETSC_COMM_SELF,PETSC_USE_POINTER,&lcolp);CHKERRQ(ierr); 1716 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1717 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1718 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 1719 /* now we just get the submatrix */ 1720 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_colsize,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1721 /* clean up */ 1722 ierr = ISDestroy(&lrowp);CHKERRQ(ierr); 1723 ierr = ISDestroy(&icolp);CHKERRQ(ierr); 1724 PetscFunctionReturn(0); 1725 } 1726 1727 #undef __FUNCT__ 1728 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1729 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1730 { 1731 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1732 Mat A = mat->A,B = mat->B; 1733 PetscErrorCode ierr; 1734 PetscReal isend[5],irecv[5]; 1735 1736 PetscFunctionBegin; 1737 info->block_size = 1.0; 1738 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1739 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1740 isend[3] = info->memory; isend[4] = info->mallocs; 1741 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1742 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1743 isend[3] += info->memory; isend[4] += info->mallocs; 1744 if (flag == MAT_LOCAL) { 1745 info->nz_used = isend[0]; 1746 info->nz_allocated = isend[1]; 1747 info->nz_unneeded = isend[2]; 1748 info->memory = isend[3]; 1749 info->mallocs = isend[4]; 1750 } else if (flag == MAT_GLOBAL_MAX) { 1751 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1752 info->nz_used = irecv[0]; 1753 info->nz_allocated = irecv[1]; 1754 info->nz_unneeded = irecv[2]; 1755 info->memory = irecv[3]; 1756 info->mallocs = irecv[4]; 1757 } else if (flag == MAT_GLOBAL_SUM) { 1758 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1759 info->nz_used = irecv[0]; 1760 info->nz_allocated = irecv[1]; 1761 info->nz_unneeded = irecv[2]; 1762 info->memory = irecv[3]; 1763 info->mallocs = irecv[4]; 1764 } 1765 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1766 info->fill_ratio_needed = 0; 1767 info->factor_mallocs = 0; 1768 1769 PetscFunctionReturn(0); 1770 } 1771 1772 #undef __FUNCT__ 1773 #define __FUNCT__ "MatSetOption_MPIAIJ" 1774 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1775 { 1776 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1777 PetscErrorCode ierr; 1778 1779 PetscFunctionBegin; 1780 switch (op) { 1781 case MAT_NEW_NONZERO_LOCATIONS: 1782 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1783 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1784 case MAT_KEEP_NONZERO_PATTERN: 1785 case MAT_NEW_NONZERO_LOCATION_ERR: 1786 case MAT_USE_INODES: 1787 case MAT_IGNORE_ZERO_ENTRIES: 1788 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1789 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1790 break; 1791 case MAT_ROW_ORIENTED: 1792 a->roworiented = flg; 1793 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1794 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1795 break; 1796 case MAT_NEW_DIAGONALS: 1797 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1798 break; 1799 case MAT_IGNORE_OFF_PROC_ENTRIES: 1800 a->donotstash = flg; 1801 break; 1802 case MAT_SPD: 1803 A->spd_set = PETSC_TRUE; 1804 A->spd = flg; 1805 if (flg) { 1806 A->symmetric = PETSC_TRUE; 1807 A->structurally_symmetric = PETSC_TRUE; 1808 A->symmetric_set = PETSC_TRUE; 1809 A->structurally_symmetric_set = PETSC_TRUE; 1810 } 1811 break; 1812 case MAT_SYMMETRIC: 1813 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1814 break; 1815 case MAT_STRUCTURALLY_SYMMETRIC: 1816 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1817 break; 1818 case MAT_HERMITIAN: 1819 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1820 break; 1821 case MAT_SYMMETRY_ETERNAL: 1822 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1823 break; 1824 default: 1825 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1826 } 1827 PetscFunctionReturn(0); 1828 } 1829 1830 #undef __FUNCT__ 1831 #define __FUNCT__ "MatGetRow_MPIAIJ" 1832 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1833 { 1834 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1835 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1836 PetscErrorCode ierr; 1837 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1838 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1839 PetscInt *cmap,*idx_p; 1840 1841 PetscFunctionBegin; 1842 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1843 mat->getrowactive = PETSC_TRUE; 1844 1845 if (!mat->rowvalues && (idx || v)) { 1846 /* 1847 allocate enough space to hold information from the longest row. 1848 */ 1849 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1850 PetscInt max = 1,tmp; 1851 for (i=0; i<matin->rmap->n; i++) { 1852 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1853 if (max < tmp) { max = tmp; } 1854 } 1855 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1856 } 1857 1858 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1859 lrow = row - rstart; 1860 1861 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1862 if (!v) {pvA = 0; pvB = 0;} 1863 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1864 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1865 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1866 nztot = nzA + nzB; 1867 1868 cmap = mat->garray; 1869 if (v || idx) { 1870 if (nztot) { 1871 /* Sort by increasing column numbers, assuming A and B already sorted */ 1872 PetscInt imark = -1; 1873 if (v) { 1874 *v = v_p = mat->rowvalues; 1875 for (i=0; i<nzB; i++) { 1876 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1877 else break; 1878 } 1879 imark = i; 1880 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1881 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1882 } 1883 if (idx) { 1884 *idx = idx_p = mat->rowindices; 1885 if (imark > -1) { 1886 for (i=0; i<imark; i++) { 1887 idx_p[i] = cmap[cworkB[i]]; 1888 } 1889 } else { 1890 for (i=0; i<nzB; i++) { 1891 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1892 else break; 1893 } 1894 imark = i; 1895 } 1896 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1897 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1898 } 1899 } else { 1900 if (idx) *idx = 0; 1901 if (v) *v = 0; 1902 } 1903 } 1904 *nz = nztot; 1905 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1906 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1907 PetscFunctionReturn(0); 1908 } 1909 1910 #undef __FUNCT__ 1911 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1912 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1913 { 1914 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1915 1916 PetscFunctionBegin; 1917 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1918 aij->getrowactive = PETSC_FALSE; 1919 PetscFunctionReturn(0); 1920 } 1921 1922 #undef __FUNCT__ 1923 #define __FUNCT__ "MatNorm_MPIAIJ" 1924 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1925 { 1926 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1927 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1928 PetscErrorCode ierr; 1929 PetscInt i,j,cstart = mat->cmap->rstart; 1930 PetscReal sum = 0.0; 1931 MatScalar *v; 1932 1933 PetscFunctionBegin; 1934 if (aij->size == 1) { 1935 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1936 } else { 1937 if (type == NORM_FROBENIUS) { 1938 v = amat->a; 1939 for (i=0; i<amat->nz; i++) { 1940 #if defined(PETSC_USE_COMPLEX) 1941 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1942 #else 1943 sum += (*v)*(*v); v++; 1944 #endif 1945 } 1946 v = bmat->a; 1947 for (i=0; i<bmat->nz; i++) { 1948 #if defined(PETSC_USE_COMPLEX) 1949 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1950 #else 1951 sum += (*v)*(*v); v++; 1952 #endif 1953 } 1954 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1955 *norm = PetscSqrtReal(*norm); 1956 } else if (type == NORM_1) { /* max column norm */ 1957 PetscReal *tmp,*tmp2; 1958 PetscInt *jj,*garray = aij->garray; 1959 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1960 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1961 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1962 *norm = 0.0; 1963 v = amat->a; jj = amat->j; 1964 for (j=0; j<amat->nz; j++) { 1965 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1966 } 1967 v = bmat->a; jj = bmat->j; 1968 for (j=0; j<bmat->nz; j++) { 1969 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1970 } 1971 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1972 for (j=0; j<mat->cmap->N; j++) { 1973 if (tmp2[j] > *norm) *norm = tmp2[j]; 1974 } 1975 ierr = PetscFree(tmp);CHKERRQ(ierr); 1976 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1977 } else if (type == NORM_INFINITY) { /* max row norm */ 1978 PetscReal ntemp = 0.0; 1979 for (j=0; j<aij->A->rmap->n; j++) { 1980 v = amat->a + amat->i[j]; 1981 sum = 0.0; 1982 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1983 sum += PetscAbsScalar(*v); v++; 1984 } 1985 v = bmat->a + bmat->i[j]; 1986 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1987 sum += PetscAbsScalar(*v); v++; 1988 } 1989 if (sum > ntemp) ntemp = sum; 1990 } 1991 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1992 } else { 1993 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 1994 } 1995 } 1996 PetscFunctionReturn(0); 1997 } 1998 1999 #undef __FUNCT__ 2000 #define __FUNCT__ "MatTranspose_MPIAIJ" 2001 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2002 { 2003 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2004 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2005 PetscErrorCode ierr; 2006 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 2007 PetscInt cstart=A->cmap->rstart,ncol; 2008 Mat B; 2009 MatScalar *array; 2010 2011 PetscFunctionBegin; 2012 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2013 2014 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 2015 ai = Aloc->i; aj = Aloc->j; 2016 bi = Bloc->i; bj = Bloc->j; 2017 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2018 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 2019 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2020 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 2021 for (i=0; i<ai[ma]; i++){ 2022 d_nnz[aj[i]] ++; 2023 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2024 } 2025 2026 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 2027 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2028 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2029 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 2030 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2031 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2032 } else { 2033 B = *matout; 2034 } 2035 2036 /* copy over the A part */ 2037 array = Aloc->a; 2038 row = A->rmap->rstart; 2039 for (i=0; i<ma; i++) { 2040 ncol = ai[i+1]-ai[i]; 2041 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2042 row++; array += ncol; aj += ncol; 2043 } 2044 aj = Aloc->j; 2045 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2046 2047 /* copy over the B part */ 2048 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2049 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2050 array = Bloc->a; 2051 row = A->rmap->rstart; 2052 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 2053 cols_tmp = cols; 2054 for (i=0; i<mb; i++) { 2055 ncol = bi[i+1]-bi[i]; 2056 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2057 row++; array += ncol; cols_tmp += ncol; 2058 } 2059 ierr = PetscFree(cols);CHKERRQ(ierr); 2060 2061 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2062 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2063 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2064 *matout = B; 2065 } else { 2066 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2067 } 2068 PetscFunctionReturn(0); 2069 } 2070 2071 #undef __FUNCT__ 2072 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2073 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2074 { 2075 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2076 Mat a = aij->A,b = aij->B; 2077 PetscErrorCode ierr; 2078 PetscInt s1,s2,s3; 2079 2080 PetscFunctionBegin; 2081 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2082 if (rr) { 2083 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2084 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2085 /* Overlap communication with computation. */ 2086 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2087 } 2088 if (ll) { 2089 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2090 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2091 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2092 } 2093 /* scale the diagonal block */ 2094 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2095 2096 if (rr) { 2097 /* Do a scatter end and then right scale the off-diagonal block */ 2098 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2099 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2100 } 2101 2102 PetscFunctionReturn(0); 2103 } 2104 2105 #undef __FUNCT__ 2106 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 2107 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 2108 { 2109 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2110 PetscErrorCode ierr; 2111 2112 PetscFunctionBegin; 2113 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 2114 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 2115 ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr); 2116 ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr); 2117 PetscFunctionReturn(0); 2118 } 2119 #undef __FUNCT__ 2120 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2121 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2122 { 2123 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2124 PetscErrorCode ierr; 2125 2126 PetscFunctionBegin; 2127 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2128 PetscFunctionReturn(0); 2129 } 2130 2131 #undef __FUNCT__ 2132 #define __FUNCT__ "MatEqual_MPIAIJ" 2133 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2134 { 2135 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2136 Mat a,b,c,d; 2137 PetscBool flg; 2138 PetscErrorCode ierr; 2139 2140 PetscFunctionBegin; 2141 a = matA->A; b = matA->B; 2142 c = matB->A; d = matB->B; 2143 2144 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2145 if (flg) { 2146 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2147 } 2148 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2149 PetscFunctionReturn(0); 2150 } 2151 2152 #undef __FUNCT__ 2153 #define __FUNCT__ "MatCopy_MPIAIJ" 2154 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2155 { 2156 PetscErrorCode ierr; 2157 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2158 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2159 2160 PetscFunctionBegin; 2161 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2162 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2163 /* because of the column compression in the off-processor part of the matrix a->B, 2164 the number of columns in a->B and b->B may be different, hence we cannot call 2165 the MatCopy() directly on the two parts. If need be, we can provide a more 2166 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2167 then copying the submatrices */ 2168 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2169 } else { 2170 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2171 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2172 } 2173 PetscFunctionReturn(0); 2174 } 2175 2176 #undef __FUNCT__ 2177 #define __FUNCT__ "MatSetUp_MPIAIJ" 2178 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2179 { 2180 PetscErrorCode ierr; 2181 2182 PetscFunctionBegin; 2183 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2184 PetscFunctionReturn(0); 2185 } 2186 2187 #undef __FUNCT__ 2188 #define __FUNCT__ "MatAXPY_MPIAIJ" 2189 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2190 { 2191 PetscErrorCode ierr; 2192 PetscInt i; 2193 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 2194 PetscBLASInt bnz,one=1; 2195 Mat_SeqAIJ *x,*y; 2196 2197 PetscFunctionBegin; 2198 if (str == SAME_NONZERO_PATTERN) { 2199 PetscScalar alpha = a; 2200 x = (Mat_SeqAIJ *)xx->A->data; 2201 y = (Mat_SeqAIJ *)yy->A->data; 2202 bnz = PetscBLASIntCast(x->nz); 2203 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2204 x = (Mat_SeqAIJ *)xx->B->data; 2205 y = (Mat_SeqAIJ *)yy->B->data; 2206 bnz = PetscBLASIntCast(x->nz); 2207 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2208 } else if (str == SUBSET_NONZERO_PATTERN) { 2209 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2210 2211 x = (Mat_SeqAIJ *)xx->B->data; 2212 y = (Mat_SeqAIJ *)yy->B->data; 2213 if (y->xtoy && y->XtoY != xx->B) { 2214 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2215 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2216 } 2217 if (!y->xtoy) { /* get xtoy */ 2218 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2219 y->XtoY = xx->B; 2220 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2221 } 2222 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2223 } else { 2224 Mat B; 2225 PetscInt *nnz_d,*nnz_o; 2226 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2227 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2228 ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr); 2229 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2230 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2231 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2232 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2233 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);CHKERRQ(ierr); 2234 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2235 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2236 ierr = MatHeaderReplace(Y,B); 2237 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2238 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2239 } 2240 PetscFunctionReturn(0); 2241 } 2242 2243 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2244 2245 #undef __FUNCT__ 2246 #define __FUNCT__ "MatConjugate_MPIAIJ" 2247 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2248 { 2249 #if defined(PETSC_USE_COMPLEX) 2250 PetscErrorCode ierr; 2251 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2252 2253 PetscFunctionBegin; 2254 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2255 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2256 #else 2257 PetscFunctionBegin; 2258 #endif 2259 PetscFunctionReturn(0); 2260 } 2261 2262 #undef __FUNCT__ 2263 #define __FUNCT__ "MatRealPart_MPIAIJ" 2264 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2265 { 2266 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2267 PetscErrorCode ierr; 2268 2269 PetscFunctionBegin; 2270 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2271 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2272 PetscFunctionReturn(0); 2273 } 2274 2275 #undef __FUNCT__ 2276 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2277 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2278 { 2279 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2280 PetscErrorCode ierr; 2281 2282 PetscFunctionBegin; 2283 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2284 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2285 PetscFunctionReturn(0); 2286 } 2287 2288 #ifdef PETSC_HAVE_PBGL 2289 2290 #include <boost/parallel/mpi/bsp_process_group.hpp> 2291 #include <boost/graph/distributed/ilu_default_graph.hpp> 2292 #include <boost/graph/distributed/ilu_0_block.hpp> 2293 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2294 #include <boost/graph/distributed/petsc/interface.hpp> 2295 #include <boost/multi_array.hpp> 2296 #include <boost/parallel/distributed_property_map->hpp> 2297 2298 #undef __FUNCT__ 2299 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2300 /* 2301 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2302 */ 2303 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2304 { 2305 namespace petsc = boost::distributed::petsc; 2306 2307 namespace graph_dist = boost::graph::distributed; 2308 using boost::graph::distributed::ilu_default::process_group_type; 2309 using boost::graph::ilu_permuted; 2310 2311 PetscBool row_identity, col_identity; 2312 PetscContainer c; 2313 PetscInt m, n, M, N; 2314 PetscErrorCode ierr; 2315 2316 PetscFunctionBegin; 2317 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2318 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2319 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2320 if (!row_identity || !col_identity) { 2321 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2322 } 2323 2324 process_group_type pg; 2325 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2326 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2327 lgraph_type& level_graph = *lgraph_p; 2328 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2329 2330 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2331 ilu_permuted(level_graph); 2332 2333 /* put together the new matrix */ 2334 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 2335 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2336 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2337 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2338 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2339 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2340 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2341 2342 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 2343 ierr = PetscContainerSetPointer(c, lgraph_p); 2344 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2345 ierr = PetscContainerDestroy(&c); 2346 PetscFunctionReturn(0); 2347 } 2348 2349 #undef __FUNCT__ 2350 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2351 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2352 { 2353 PetscFunctionBegin; 2354 PetscFunctionReturn(0); 2355 } 2356 2357 #undef __FUNCT__ 2358 #define __FUNCT__ "MatSolve_MPIAIJ" 2359 /* 2360 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2361 */ 2362 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2363 { 2364 namespace graph_dist = boost::graph::distributed; 2365 2366 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2367 lgraph_type* lgraph_p; 2368 PetscContainer c; 2369 PetscErrorCode ierr; 2370 2371 PetscFunctionBegin; 2372 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 2373 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 2374 ierr = VecCopy(b, x);CHKERRQ(ierr); 2375 2376 PetscScalar* array_x; 2377 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2378 PetscInt sx; 2379 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2380 2381 PetscScalar* array_b; 2382 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2383 PetscInt sb; 2384 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2385 2386 lgraph_type& level_graph = *lgraph_p; 2387 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2388 2389 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2390 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2391 ref_x(array_x, boost::extents[num_vertices(graph)]); 2392 2393 typedef boost::iterator_property_map<array_ref_type::iterator, 2394 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2395 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2396 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2397 2398 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2399 2400 PetscFunctionReturn(0); 2401 } 2402 #endif 2403 2404 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2405 PetscInt nzlocal,nsends,nrecvs; 2406 PetscMPIInt *send_rank,*recv_rank; 2407 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2408 PetscScalar *sbuf_a,**rbuf_a; 2409 PetscErrorCode (*Destroy)(Mat); 2410 } Mat_Redundant; 2411 2412 #undef __FUNCT__ 2413 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2414 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2415 { 2416 PetscErrorCode ierr; 2417 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2418 PetscInt i; 2419 2420 PetscFunctionBegin; 2421 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2422 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2423 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2424 for (i=0; i<redund->nrecvs; i++){ 2425 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2426 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2427 } 2428 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2429 ierr = PetscFree(redund);CHKERRQ(ierr); 2430 PetscFunctionReturn(0); 2431 } 2432 2433 #undef __FUNCT__ 2434 #define __FUNCT__ "MatDestroy_MatRedundant" 2435 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2436 { 2437 PetscErrorCode ierr; 2438 PetscContainer container; 2439 Mat_Redundant *redund=PETSC_NULL; 2440 2441 PetscFunctionBegin; 2442 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2443 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2444 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2445 A->ops->destroy = redund->Destroy; 2446 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2447 if (A->ops->destroy) { 2448 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2449 } 2450 PetscFunctionReturn(0); 2451 } 2452 2453 #undef __FUNCT__ 2454 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2455 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2456 { 2457 PetscMPIInt rank,size; 2458 MPI_Comm comm=((PetscObject)mat)->comm; 2459 PetscErrorCode ierr; 2460 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2461 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2462 PetscInt *rowrange=mat->rmap->range; 2463 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2464 Mat A=aij->A,B=aij->B,C=*matredundant; 2465 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2466 PetscScalar *sbuf_a; 2467 PetscInt nzlocal=a->nz+b->nz; 2468 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2469 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2470 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2471 MatScalar *aworkA,*aworkB; 2472 PetscScalar *vals; 2473 PetscMPIInt tag1,tag2,tag3,imdex; 2474 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2475 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2476 MPI_Status recv_status,*send_status; 2477 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2478 PetscInt **rbuf_j=PETSC_NULL; 2479 PetscScalar **rbuf_a=PETSC_NULL; 2480 Mat_Redundant *redund=PETSC_NULL; 2481 PetscContainer container; 2482 2483 PetscFunctionBegin; 2484 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2485 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2486 2487 if (reuse == MAT_REUSE_MATRIX) { 2488 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2489 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2490 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2491 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2492 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2493 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2494 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2495 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2496 2497 nsends = redund->nsends; 2498 nrecvs = redund->nrecvs; 2499 send_rank = redund->send_rank; 2500 recv_rank = redund->recv_rank; 2501 sbuf_nz = redund->sbuf_nz; 2502 rbuf_nz = redund->rbuf_nz; 2503 sbuf_j = redund->sbuf_j; 2504 sbuf_a = redund->sbuf_a; 2505 rbuf_j = redund->rbuf_j; 2506 rbuf_a = redund->rbuf_a; 2507 } 2508 2509 if (reuse == MAT_INITIAL_MATRIX){ 2510 PetscMPIInt subrank,subsize; 2511 PetscInt nleftover,np_subcomm; 2512 /* get the destination processors' id send_rank, nsends and nrecvs */ 2513 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2514 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2515 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2516 np_subcomm = size/nsubcomm; 2517 nleftover = size - nsubcomm*np_subcomm; 2518 nsends = 0; nrecvs = 0; 2519 for (i=0; i<size; i++){ /* i=rank*/ 2520 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2521 send_rank[nsends] = i; nsends++; 2522 recv_rank[nrecvs++] = i; 2523 } 2524 } 2525 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2526 i = size-nleftover-1; 2527 j = 0; 2528 while (j < nsubcomm - nleftover){ 2529 send_rank[nsends++] = i; 2530 i--; j++; 2531 } 2532 } 2533 2534 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2535 for (i=0; i<nleftover; i++){ 2536 recv_rank[nrecvs++] = size-nleftover+i; 2537 } 2538 } 2539 2540 /* allocate sbuf_j, sbuf_a */ 2541 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2542 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2543 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2544 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2545 2546 /* copy mat's local entries into the buffers */ 2547 if (reuse == MAT_INITIAL_MATRIX){ 2548 rownz_max = 0; 2549 rptr = sbuf_j; 2550 cols = sbuf_j + rend-rstart + 1; 2551 vals = sbuf_a; 2552 rptr[0] = 0; 2553 for (i=0; i<rend-rstart; i++){ 2554 row = i + rstart; 2555 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2556 ncols = nzA + nzB; 2557 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2558 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2559 /* load the column indices for this row into cols */ 2560 lwrite = 0; 2561 for (l=0; l<nzB; l++) { 2562 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2563 vals[lwrite] = aworkB[l]; 2564 cols[lwrite++] = ctmp; 2565 } 2566 } 2567 for (l=0; l<nzA; l++){ 2568 vals[lwrite] = aworkA[l]; 2569 cols[lwrite++] = cstart + cworkA[l]; 2570 } 2571 for (l=0; l<nzB; l++) { 2572 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2573 vals[lwrite] = aworkB[l]; 2574 cols[lwrite++] = ctmp; 2575 } 2576 } 2577 vals += ncols; 2578 cols += ncols; 2579 rptr[i+1] = rptr[i] + ncols; 2580 if (rownz_max < ncols) rownz_max = ncols; 2581 } 2582 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2583 } else { /* only copy matrix values into sbuf_a */ 2584 rptr = sbuf_j; 2585 vals = sbuf_a; 2586 rptr[0] = 0; 2587 for (i=0; i<rend-rstart; i++){ 2588 row = i + rstart; 2589 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2590 ncols = nzA + nzB; 2591 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2592 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2593 lwrite = 0; 2594 for (l=0; l<nzB; l++) { 2595 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2596 } 2597 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2598 for (l=0; l<nzB; l++) { 2599 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2600 } 2601 vals += ncols; 2602 rptr[i+1] = rptr[i] + ncols; 2603 } 2604 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2605 2606 /* send nzlocal to others, and recv other's nzlocal */ 2607 /*--------------------------------------------------*/ 2608 if (reuse == MAT_INITIAL_MATRIX){ 2609 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2610 s_waits2 = s_waits3 + nsends; 2611 s_waits1 = s_waits2 + nsends; 2612 r_waits1 = s_waits1 + nsends; 2613 r_waits2 = r_waits1 + nrecvs; 2614 r_waits3 = r_waits2 + nrecvs; 2615 } else { 2616 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2617 r_waits3 = s_waits3 + nsends; 2618 } 2619 2620 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2621 if (reuse == MAT_INITIAL_MATRIX){ 2622 /* get new tags to keep the communication clean */ 2623 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2624 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2625 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2626 2627 /* post receives of other's nzlocal */ 2628 for (i=0; i<nrecvs; i++){ 2629 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2630 } 2631 /* send nzlocal to others */ 2632 for (i=0; i<nsends; i++){ 2633 sbuf_nz[i] = nzlocal; 2634 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2635 } 2636 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2637 count = nrecvs; 2638 while (count) { 2639 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2640 recv_rank[imdex] = recv_status.MPI_SOURCE; 2641 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2642 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2643 2644 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2645 rbuf_nz[imdex] += i + 2; 2646 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2647 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2648 count--; 2649 } 2650 /* wait on sends of nzlocal */ 2651 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2652 /* send mat->i,j to others, and recv from other's */ 2653 /*------------------------------------------------*/ 2654 for (i=0; i<nsends; i++){ 2655 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2656 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2657 } 2658 /* wait on receives of mat->i,j */ 2659 /*------------------------------*/ 2660 count = nrecvs; 2661 while (count) { 2662 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2663 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2664 count--; 2665 } 2666 /* wait on sends of mat->i,j */ 2667 /*---------------------------*/ 2668 if (nsends) { 2669 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2670 } 2671 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2672 2673 /* post receives, send and receive mat->a */ 2674 /*----------------------------------------*/ 2675 for (imdex=0; imdex<nrecvs; imdex++) { 2676 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2677 } 2678 for (i=0; i<nsends; i++){ 2679 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2680 } 2681 count = nrecvs; 2682 while (count) { 2683 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2684 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2685 count--; 2686 } 2687 if (nsends) { 2688 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2689 } 2690 2691 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2692 2693 /* create redundant matrix */ 2694 /*-------------------------*/ 2695 if (reuse == MAT_INITIAL_MATRIX){ 2696 /* compute rownz_max for preallocation */ 2697 for (imdex=0; imdex<nrecvs; imdex++){ 2698 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2699 rptr = rbuf_j[imdex]; 2700 for (i=0; i<j; i++){ 2701 ncols = rptr[i+1] - rptr[i]; 2702 if (rownz_max < ncols) rownz_max = ncols; 2703 } 2704 } 2705 2706 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2707 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2708 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2709 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2710 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2711 } else { 2712 C = *matredundant; 2713 } 2714 2715 /* insert local matrix entries */ 2716 rptr = sbuf_j; 2717 cols = sbuf_j + rend-rstart + 1; 2718 vals = sbuf_a; 2719 for (i=0; i<rend-rstart; i++){ 2720 row = i + rstart; 2721 ncols = rptr[i+1] - rptr[i]; 2722 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2723 vals += ncols; 2724 cols += ncols; 2725 } 2726 /* insert received matrix entries */ 2727 for (imdex=0; imdex<nrecvs; imdex++){ 2728 rstart = rowrange[recv_rank[imdex]]; 2729 rend = rowrange[recv_rank[imdex]+1]; 2730 rptr = rbuf_j[imdex]; 2731 cols = rbuf_j[imdex] + rend-rstart + 1; 2732 vals = rbuf_a[imdex]; 2733 for (i=0; i<rend-rstart; i++){ 2734 row = i + rstart; 2735 ncols = rptr[i+1] - rptr[i]; 2736 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2737 vals += ncols; 2738 cols += ncols; 2739 } 2740 } 2741 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2742 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2743 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2744 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2745 if (reuse == MAT_INITIAL_MATRIX) { 2746 PetscContainer container; 2747 *matredundant = C; 2748 /* create a supporting struct and attach it to C for reuse */ 2749 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2750 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2751 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2752 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2753 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2754 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 2755 2756 redund->nzlocal = nzlocal; 2757 redund->nsends = nsends; 2758 redund->nrecvs = nrecvs; 2759 redund->send_rank = send_rank; 2760 redund->recv_rank = recv_rank; 2761 redund->sbuf_nz = sbuf_nz; 2762 redund->rbuf_nz = rbuf_nz; 2763 redund->sbuf_j = sbuf_j; 2764 redund->sbuf_a = sbuf_a; 2765 redund->rbuf_j = rbuf_j; 2766 redund->rbuf_a = rbuf_a; 2767 2768 redund->Destroy = C->ops->destroy; 2769 C->ops->destroy = MatDestroy_MatRedundant; 2770 } 2771 PetscFunctionReturn(0); 2772 } 2773 2774 #undef __FUNCT__ 2775 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2776 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2777 { 2778 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2779 PetscErrorCode ierr; 2780 PetscInt i,*idxb = 0; 2781 PetscScalar *va,*vb; 2782 Vec vtmp; 2783 2784 PetscFunctionBegin; 2785 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2786 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2787 if (idx) { 2788 for (i=0; i<A->rmap->n; i++) { 2789 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2790 } 2791 } 2792 2793 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2794 if (idx) { 2795 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2796 } 2797 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2798 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2799 2800 for (i=0; i<A->rmap->n; i++){ 2801 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2802 va[i] = vb[i]; 2803 if (idx) idx[i] = a->garray[idxb[i]]; 2804 } 2805 } 2806 2807 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2808 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2809 ierr = PetscFree(idxb);CHKERRQ(ierr); 2810 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2811 PetscFunctionReturn(0); 2812 } 2813 2814 #undef __FUNCT__ 2815 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2816 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2817 { 2818 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2819 PetscErrorCode ierr; 2820 PetscInt i,*idxb = 0; 2821 PetscScalar *va,*vb; 2822 Vec vtmp; 2823 2824 PetscFunctionBegin; 2825 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2826 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2827 if (idx) { 2828 for (i=0; i<A->cmap->n; i++) { 2829 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2830 } 2831 } 2832 2833 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2834 if (idx) { 2835 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2836 } 2837 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2838 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2839 2840 for (i=0; i<A->rmap->n; i++){ 2841 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2842 va[i] = vb[i]; 2843 if (idx) idx[i] = a->garray[idxb[i]]; 2844 } 2845 } 2846 2847 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2848 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2849 ierr = PetscFree(idxb);CHKERRQ(ierr); 2850 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2851 PetscFunctionReturn(0); 2852 } 2853 2854 #undef __FUNCT__ 2855 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2856 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2857 { 2858 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2859 PetscInt n = A->rmap->n; 2860 PetscInt cstart = A->cmap->rstart; 2861 PetscInt *cmap = mat->garray; 2862 PetscInt *diagIdx, *offdiagIdx; 2863 Vec diagV, offdiagV; 2864 PetscScalar *a, *diagA, *offdiagA; 2865 PetscInt r; 2866 PetscErrorCode ierr; 2867 2868 PetscFunctionBegin; 2869 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2870 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2871 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2872 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2873 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2874 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2875 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2876 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2877 for(r = 0; r < n; ++r) { 2878 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2879 a[r] = diagA[r]; 2880 idx[r] = cstart + diagIdx[r]; 2881 } else { 2882 a[r] = offdiagA[r]; 2883 idx[r] = cmap[offdiagIdx[r]]; 2884 } 2885 } 2886 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2887 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2888 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2889 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2890 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2891 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2892 PetscFunctionReturn(0); 2893 } 2894 2895 #undef __FUNCT__ 2896 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2897 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2898 { 2899 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2900 PetscInt n = A->rmap->n; 2901 PetscInt cstart = A->cmap->rstart; 2902 PetscInt *cmap = mat->garray; 2903 PetscInt *diagIdx, *offdiagIdx; 2904 Vec diagV, offdiagV; 2905 PetscScalar *a, *diagA, *offdiagA; 2906 PetscInt r; 2907 PetscErrorCode ierr; 2908 2909 PetscFunctionBegin; 2910 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2911 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2912 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2913 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2914 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2915 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2916 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2917 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2918 for(r = 0; r < n; ++r) { 2919 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2920 a[r] = diagA[r]; 2921 idx[r] = cstart + diagIdx[r]; 2922 } else { 2923 a[r] = offdiagA[r]; 2924 idx[r] = cmap[offdiagIdx[r]]; 2925 } 2926 } 2927 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2928 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2929 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2930 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2931 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2932 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2933 PetscFunctionReturn(0); 2934 } 2935 2936 #undef __FUNCT__ 2937 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 2938 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2939 { 2940 PetscErrorCode ierr; 2941 Mat *dummy; 2942 2943 PetscFunctionBegin; 2944 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2945 *newmat = *dummy; 2946 ierr = PetscFree(dummy);CHKERRQ(ierr); 2947 PetscFunctionReturn(0); 2948 } 2949 2950 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2951 2952 #undef __FUNCT__ 2953 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 2954 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,PetscScalar **values) 2955 { 2956 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2957 PetscErrorCode ierr; 2958 2959 PetscFunctionBegin; 2960 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2961 PetscFunctionReturn(0); 2962 } 2963 2964 2965 /* -------------------------------------------------------------------*/ 2966 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2967 MatGetRow_MPIAIJ, 2968 MatRestoreRow_MPIAIJ, 2969 MatMult_MPIAIJ, 2970 /* 4*/ MatMultAdd_MPIAIJ, 2971 MatMultTranspose_MPIAIJ, 2972 MatMultTransposeAdd_MPIAIJ, 2973 #ifdef PETSC_HAVE_PBGL 2974 MatSolve_MPIAIJ, 2975 #else 2976 0, 2977 #endif 2978 0, 2979 0, 2980 /*10*/ 0, 2981 0, 2982 0, 2983 MatSOR_MPIAIJ, 2984 MatTranspose_MPIAIJ, 2985 /*15*/ MatGetInfo_MPIAIJ, 2986 MatEqual_MPIAIJ, 2987 MatGetDiagonal_MPIAIJ, 2988 MatDiagonalScale_MPIAIJ, 2989 MatNorm_MPIAIJ, 2990 /*20*/ MatAssemblyBegin_MPIAIJ, 2991 MatAssemblyEnd_MPIAIJ, 2992 MatSetOption_MPIAIJ, 2993 MatZeroEntries_MPIAIJ, 2994 /*24*/ MatZeroRows_MPIAIJ, 2995 0, 2996 #ifdef PETSC_HAVE_PBGL 2997 0, 2998 #else 2999 0, 3000 #endif 3001 0, 3002 0, 3003 /*29*/ MatSetUp_MPIAIJ, 3004 #ifdef PETSC_HAVE_PBGL 3005 0, 3006 #else 3007 0, 3008 #endif 3009 0, 3010 0, 3011 0, 3012 /*34*/ MatDuplicate_MPIAIJ, 3013 0, 3014 0, 3015 0, 3016 0, 3017 /*39*/ MatAXPY_MPIAIJ, 3018 MatGetSubMatrices_MPIAIJ, 3019 MatIncreaseOverlap_MPIAIJ, 3020 MatGetValues_MPIAIJ, 3021 MatCopy_MPIAIJ, 3022 /*44*/ MatGetRowMax_MPIAIJ, 3023 MatScale_MPIAIJ, 3024 0, 3025 0, 3026 MatZeroRowsColumns_MPIAIJ, 3027 /*49*/ MatSetBlockSize_MPIAIJ, 3028 0, 3029 0, 3030 0, 3031 0, 3032 /*54*/ MatFDColoringCreate_MPIAIJ, 3033 0, 3034 MatSetUnfactored_MPIAIJ, 3035 MatPermute_MPIAIJ, 3036 0, 3037 /*59*/ MatGetSubMatrix_MPIAIJ, 3038 MatDestroy_MPIAIJ, 3039 MatView_MPIAIJ, 3040 0, 3041 0, 3042 /*64*/ 0, 3043 0, 3044 0, 3045 0, 3046 0, 3047 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3048 MatGetRowMinAbs_MPIAIJ, 3049 0, 3050 MatSetColoring_MPIAIJ, 3051 #if defined(PETSC_HAVE_ADIC) 3052 MatSetValuesAdic_MPIAIJ, 3053 #else 3054 0, 3055 #endif 3056 MatSetValuesAdifor_MPIAIJ, 3057 /*75*/ MatFDColoringApply_AIJ, 3058 0, 3059 0, 3060 0, 3061 0, 3062 /*80*/ 0, 3063 0, 3064 0, 3065 /*83*/ MatLoad_MPIAIJ, 3066 0, 3067 0, 3068 0, 3069 0, 3070 0, 3071 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3072 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3073 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3074 MatPtAP_Basic, 3075 MatPtAPSymbolic_MPIAIJ, 3076 /*94*/ MatPtAPNumeric_MPIAIJ, 3077 0, 3078 0, 3079 0, 3080 0, 3081 /*99*/ 0, 3082 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3083 MatPtAPNumeric_MPIAIJ_MPIAIJ, 3084 MatConjugate_MPIAIJ, 3085 0, 3086 /*104*/MatSetValuesRow_MPIAIJ, 3087 MatRealPart_MPIAIJ, 3088 MatImaginaryPart_MPIAIJ, 3089 0, 3090 0, 3091 /*109*/0, 3092 MatGetRedundantMatrix_MPIAIJ, 3093 MatGetRowMin_MPIAIJ, 3094 0, 3095 0, 3096 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3097 0, 3098 0, 3099 0, 3100 0, 3101 /*119*/0, 3102 0, 3103 0, 3104 0, 3105 MatGetMultiProcBlock_MPIAIJ, 3106 /*124*/MatFindNonZeroRows_MPIAIJ, 3107 MatGetColumnNorms_MPIAIJ, 3108 MatInvertBlockDiagonal_MPIAIJ, 3109 0, 3110 MatGetSubMatricesParallel_MPIAIJ, 3111 /*129*/0, 3112 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3113 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3114 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3115 0, 3116 /*134*/0, 3117 0, 3118 0, 3119 0, 3120 0 3121 }; 3122 3123 /* ----------------------------------------------------------------------------------------*/ 3124 3125 EXTERN_C_BEGIN 3126 #undef __FUNCT__ 3127 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3128 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3129 { 3130 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3131 PetscErrorCode ierr; 3132 3133 PetscFunctionBegin; 3134 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3135 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3136 PetscFunctionReturn(0); 3137 } 3138 EXTERN_C_END 3139 3140 EXTERN_C_BEGIN 3141 #undef __FUNCT__ 3142 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3143 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3144 { 3145 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3146 PetscErrorCode ierr; 3147 3148 PetscFunctionBegin; 3149 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3150 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3151 PetscFunctionReturn(0); 3152 } 3153 EXTERN_C_END 3154 3155 EXTERN_C_BEGIN 3156 #undef __FUNCT__ 3157 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3158 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3159 { 3160 Mat_MPIAIJ *b; 3161 PetscErrorCode ierr; 3162 PetscInt i; 3163 PetscBool d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE; 3164 3165 PetscFunctionBegin; 3166 if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE; 3167 if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE; 3168 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3169 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3170 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3171 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3172 3173 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3174 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3175 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3176 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3177 if (d_nnz) { 3178 for (i=0; i<B->rmap->n; i++) { 3179 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 3180 } 3181 } 3182 if (o_nnz) { 3183 for (i=0; i<B->rmap->n; i++) { 3184 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 3185 } 3186 } 3187 b = (Mat_MPIAIJ*)B->data; 3188 3189 if (!B->preallocated) { 3190 /* Explicitly create 2 MATSEQAIJ matrices. */ 3191 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3192 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3193 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3194 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3195 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3196 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3197 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3198 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3199 } 3200 3201 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3202 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3203 /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */ 3204 if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3205 if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3206 B->preallocated = PETSC_TRUE; 3207 PetscFunctionReturn(0); 3208 } 3209 EXTERN_C_END 3210 3211 #undef __FUNCT__ 3212 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3213 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3214 { 3215 Mat mat; 3216 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3217 PetscErrorCode ierr; 3218 3219 PetscFunctionBegin; 3220 *newmat = 0; 3221 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3222 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3223 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3224 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3225 a = (Mat_MPIAIJ*)mat->data; 3226 3227 mat->factortype = matin->factortype; 3228 mat->rmap->bs = matin->rmap->bs; 3229 mat->assembled = PETSC_TRUE; 3230 mat->insertmode = NOT_SET_VALUES; 3231 mat->preallocated = PETSC_TRUE; 3232 3233 a->size = oldmat->size; 3234 a->rank = oldmat->rank; 3235 a->donotstash = oldmat->donotstash; 3236 a->roworiented = oldmat->roworiented; 3237 a->rowindices = 0; 3238 a->rowvalues = 0; 3239 a->getrowactive = PETSC_FALSE; 3240 3241 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3242 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3243 3244 if (oldmat->colmap) { 3245 #if defined (PETSC_USE_CTABLE) 3246 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3247 #else 3248 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3249 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3250 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3251 #endif 3252 } else a->colmap = 0; 3253 if (oldmat->garray) { 3254 PetscInt len; 3255 len = oldmat->B->cmap->n; 3256 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3257 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3258 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3259 } else a->garray = 0; 3260 3261 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3262 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3263 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3264 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3265 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3266 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3267 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3268 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3269 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3270 *newmat = mat; 3271 PetscFunctionReturn(0); 3272 } 3273 3274 3275 3276 #undef __FUNCT__ 3277 #define __FUNCT__ "MatLoad_MPIAIJ" 3278 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3279 { 3280 PetscScalar *vals,*svals; 3281 MPI_Comm comm = ((PetscObject)viewer)->comm; 3282 PetscErrorCode ierr; 3283 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3284 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3285 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3286 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 3287 PetscInt cend,cstart,n,*rowners,sizesset=1; 3288 int fd; 3289 3290 PetscFunctionBegin; 3291 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3292 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3293 if (!rank) { 3294 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3295 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3296 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3297 } 3298 3299 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3300 3301 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3302 M = header[1]; N = header[2]; 3303 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3304 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3305 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3306 3307 /* If global sizes are set, check if they are consistent with that given in the file */ 3308 if (sizesset) { 3309 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3310 } 3311 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3312 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3313 3314 /* determine ownership of all rows */ 3315 if (newMat->rmap->n < 0 ) m = M/size + ((M % size) > rank); /* PETSC_DECIDE */ 3316 else m = newMat->rmap->n; /* Set by user */ 3317 3318 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3319 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3320 3321 /* First process needs enough room for process with most rows */ 3322 if (!rank) { 3323 mmax = rowners[1]; 3324 for (i=2; i<size; i++) { 3325 mmax = PetscMax(mmax,rowners[i]); 3326 } 3327 } else mmax = m; 3328 3329 rowners[0] = 0; 3330 for (i=2; i<=size; i++) { 3331 rowners[i] += rowners[i-1]; 3332 } 3333 rstart = rowners[rank]; 3334 rend = rowners[rank+1]; 3335 3336 /* distribute row lengths to all processors */ 3337 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 3338 if (!rank) { 3339 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3340 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3341 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3342 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3343 for (j=0; j<m; j++) { 3344 procsnz[0] += ourlens[j]; 3345 } 3346 for (i=1; i<size; i++) { 3347 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3348 /* calculate the number of nonzeros on each processor */ 3349 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3350 procsnz[i] += rowlengths[j]; 3351 } 3352 ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3353 } 3354 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3355 } else { 3356 ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3357 } 3358 3359 if (!rank) { 3360 /* determine max buffer needed and allocate it */ 3361 maxnz = 0; 3362 for (i=0; i<size; i++) { 3363 maxnz = PetscMax(maxnz,procsnz[i]); 3364 } 3365 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3366 3367 /* read in my part of the matrix column indices */ 3368 nz = procsnz[0]; 3369 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3370 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3371 3372 /* read in every one elses and ship off */ 3373 for (i=1; i<size; i++) { 3374 nz = procsnz[i]; 3375 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3376 ierr = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3377 } 3378 ierr = PetscFree(cols);CHKERRQ(ierr); 3379 } else { 3380 /* determine buffer space needed for message */ 3381 nz = 0; 3382 for (i=0; i<m; i++) { 3383 nz += ourlens[i]; 3384 } 3385 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3386 3387 /* receive message of column indices*/ 3388 ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3389 } 3390 3391 /* determine column ownership if matrix is not square */ 3392 if (N != M) { 3393 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3394 else n = newMat->cmap->n; 3395 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3396 cstart = cend - n; 3397 } else { 3398 cstart = rstart; 3399 cend = rend; 3400 n = cend - cstart; 3401 } 3402 3403 /* loop over local rows, determining number of off diagonal entries */ 3404 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3405 jj = 0; 3406 for (i=0; i<m; i++) { 3407 for (j=0; j<ourlens[i]; j++) { 3408 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3409 jj++; 3410 } 3411 } 3412 3413 for (i=0; i<m; i++) { 3414 ourlens[i] -= offlens[i]; 3415 } 3416 if (!sizesset) { 3417 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3418 } 3419 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3420 3421 for (i=0; i<m; i++) { 3422 ourlens[i] += offlens[i]; 3423 } 3424 3425 if (!rank) { 3426 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3427 3428 /* read in my part of the matrix numerical values */ 3429 nz = procsnz[0]; 3430 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3431 3432 /* insert into matrix */ 3433 jj = rstart; 3434 smycols = mycols; 3435 svals = vals; 3436 for (i=0; i<m; i++) { 3437 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3438 smycols += ourlens[i]; 3439 svals += ourlens[i]; 3440 jj++; 3441 } 3442 3443 /* read in other processors and ship out */ 3444 for (i=1; i<size; i++) { 3445 nz = procsnz[i]; 3446 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3447 ierr = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3448 } 3449 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3450 } else { 3451 /* receive numeric values */ 3452 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3453 3454 /* receive message of values*/ 3455 ierr = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3456 3457 /* insert into matrix */ 3458 jj = rstart; 3459 smycols = mycols; 3460 svals = vals; 3461 for (i=0; i<m; i++) { 3462 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3463 smycols += ourlens[i]; 3464 svals += ourlens[i]; 3465 jj++; 3466 } 3467 } 3468 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3469 ierr = PetscFree(vals);CHKERRQ(ierr); 3470 ierr = PetscFree(mycols);CHKERRQ(ierr); 3471 ierr = PetscFree(rowners);CHKERRQ(ierr); 3472 3473 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3474 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3475 PetscFunctionReturn(0); 3476 } 3477 3478 #undef __FUNCT__ 3479 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3480 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3481 { 3482 PetscErrorCode ierr; 3483 IS iscol_local; 3484 PetscInt csize; 3485 3486 PetscFunctionBegin; 3487 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3488 if (call == MAT_REUSE_MATRIX) { 3489 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3490 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3491 } else { 3492 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3493 } 3494 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3495 if (call == MAT_INITIAL_MATRIX) { 3496 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3497 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3498 } 3499 PetscFunctionReturn(0); 3500 } 3501 3502 #undef __FUNCT__ 3503 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3504 /* 3505 Not great since it makes two copies of the submatrix, first an SeqAIJ 3506 in local and then by concatenating the local matrices the end result. 3507 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3508 3509 Note: This requires a sequential iscol with all indices. 3510 */ 3511 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3512 { 3513 PetscErrorCode ierr; 3514 PetscMPIInt rank,size; 3515 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3516 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3517 Mat *local,M,Mreuse; 3518 MatScalar *vwork,*aa; 3519 MPI_Comm comm = ((PetscObject)mat)->comm; 3520 Mat_SeqAIJ *aij; 3521 3522 3523 PetscFunctionBegin; 3524 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3525 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3526 3527 if (call == MAT_REUSE_MATRIX) { 3528 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3529 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3530 local = &Mreuse; 3531 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3532 } else { 3533 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3534 Mreuse = *local; 3535 ierr = PetscFree(local);CHKERRQ(ierr); 3536 } 3537 3538 /* 3539 m - number of local rows 3540 n - number of columns (same on all processors) 3541 rstart - first row in new global matrix generated 3542 */ 3543 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3544 if (call == MAT_INITIAL_MATRIX) { 3545 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3546 ii = aij->i; 3547 jj = aij->j; 3548 3549 /* 3550 Determine the number of non-zeros in the diagonal and off-diagonal 3551 portions of the matrix in order to do correct preallocation 3552 */ 3553 3554 /* first get start and end of "diagonal" columns */ 3555 if (csize == PETSC_DECIDE) { 3556 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3557 if (mglobal == n) { /* square matrix */ 3558 nlocal = m; 3559 } else { 3560 nlocal = n/size + ((n % size) > rank); 3561 } 3562 } else { 3563 nlocal = csize; 3564 } 3565 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3566 rstart = rend - nlocal; 3567 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3568 3569 /* next, compute all the lengths */ 3570 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3571 olens = dlens + m; 3572 for (i=0; i<m; i++) { 3573 jend = ii[i+1] - ii[i]; 3574 olen = 0; 3575 dlen = 0; 3576 for (j=0; j<jend; j++) { 3577 if (*jj < rstart || *jj >= rend) olen++; 3578 else dlen++; 3579 jj++; 3580 } 3581 olens[i] = olen; 3582 dlens[i] = dlen; 3583 } 3584 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3585 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3586 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3587 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3588 ierr = PetscFree(dlens);CHKERRQ(ierr); 3589 } else { 3590 PetscInt ml,nl; 3591 3592 M = *newmat; 3593 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3594 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3595 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3596 /* 3597 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3598 rather than the slower MatSetValues(). 3599 */ 3600 M->was_assembled = PETSC_TRUE; 3601 M->assembled = PETSC_FALSE; 3602 } 3603 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3604 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3605 ii = aij->i; 3606 jj = aij->j; 3607 aa = aij->a; 3608 for (i=0; i<m; i++) { 3609 row = rstart + i; 3610 nz = ii[i+1] - ii[i]; 3611 cwork = jj; jj += nz; 3612 vwork = aa; aa += nz; 3613 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3614 } 3615 3616 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3617 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3618 *newmat = M; 3619 3620 /* save submatrix used in processor for next request */ 3621 if (call == MAT_INITIAL_MATRIX) { 3622 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3623 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3624 } 3625 3626 PetscFunctionReturn(0); 3627 } 3628 3629 EXTERN_C_BEGIN 3630 #undef __FUNCT__ 3631 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3632 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3633 { 3634 PetscInt m,cstart, cend,j,nnz,i,d; 3635 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3636 const PetscInt *JJ; 3637 PetscScalar *values; 3638 PetscErrorCode ierr; 3639 3640 PetscFunctionBegin; 3641 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3642 3643 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3644 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3645 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3646 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3647 m = B->rmap->n; 3648 cstart = B->cmap->rstart; 3649 cend = B->cmap->rend; 3650 rstart = B->rmap->rstart; 3651 3652 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3653 3654 #if defined(PETSC_USE_DEBUGGING) 3655 for (i=0; i<m; i++) { 3656 nnz = Ii[i+1]- Ii[i]; 3657 JJ = J + Ii[i]; 3658 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3659 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3660 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3661 } 3662 #endif 3663 3664 for (i=0; i<m; i++) { 3665 nnz = Ii[i+1]- Ii[i]; 3666 JJ = J + Ii[i]; 3667 nnz_max = PetscMax(nnz_max,nnz); 3668 d = 0; 3669 for (j=0; j<nnz; j++) { 3670 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3671 } 3672 d_nnz[i] = d; 3673 o_nnz[i] = nnz - d; 3674 } 3675 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3676 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3677 3678 if (v) values = (PetscScalar*)v; 3679 else { 3680 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3681 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3682 } 3683 3684 for (i=0; i<m; i++) { 3685 ii = i + rstart; 3686 nnz = Ii[i+1]- Ii[i]; 3687 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3688 } 3689 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3690 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3691 3692 if (!v) { 3693 ierr = PetscFree(values);CHKERRQ(ierr); 3694 } 3695 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3696 PetscFunctionReturn(0); 3697 } 3698 EXTERN_C_END 3699 3700 #undef __FUNCT__ 3701 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3702 /*@ 3703 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3704 (the default parallel PETSc format). 3705 3706 Collective on MPI_Comm 3707 3708 Input Parameters: 3709 + B - the matrix 3710 . i - the indices into j for the start of each local row (starts with zero) 3711 . j - the column indices for each local row (starts with zero) 3712 - v - optional values in the matrix 3713 3714 Level: developer 3715 3716 Notes: 3717 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3718 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3719 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3720 3721 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3722 3723 The format which is used for the sparse matrix input, is equivalent to a 3724 row-major ordering.. i.e for the following matrix, the input data expected is 3725 as shown: 3726 3727 1 0 0 3728 2 0 3 P0 3729 ------- 3730 4 5 6 P1 3731 3732 Process0 [P0]: rows_owned=[0,1] 3733 i = {0,1,3} [size = nrow+1 = 2+1] 3734 j = {0,0,2} [size = nz = 6] 3735 v = {1,2,3} [size = nz = 6] 3736 3737 Process1 [P1]: rows_owned=[2] 3738 i = {0,3} [size = nrow+1 = 1+1] 3739 j = {0,1,2} [size = nz = 6] 3740 v = {4,5,6} [size = nz = 6] 3741 3742 .keywords: matrix, aij, compressed row, sparse, parallel 3743 3744 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3745 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3746 @*/ 3747 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3748 { 3749 PetscErrorCode ierr; 3750 3751 PetscFunctionBegin; 3752 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3753 PetscFunctionReturn(0); 3754 } 3755 3756 #undef __FUNCT__ 3757 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3758 /*@C 3759 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3760 (the default parallel PETSc format). For good matrix assembly performance 3761 the user should preallocate the matrix storage by setting the parameters 3762 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3763 performance can be increased by more than a factor of 50. 3764 3765 Collective on MPI_Comm 3766 3767 Input Parameters: 3768 + A - the matrix 3769 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3770 (same value is used for all local rows) 3771 . d_nnz - array containing the number of nonzeros in the various rows of the 3772 DIAGONAL portion of the local submatrix (possibly different for each row) 3773 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3774 The size of this array is equal to the number of local rows, i.e 'm'. 3775 For matrices that will be factored, you must leave room for (and set) 3776 the diagonal entry even if it is zero. 3777 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3778 submatrix (same value is used for all local rows). 3779 - o_nnz - array containing the number of nonzeros in the various rows of the 3780 OFF-DIAGONAL portion of the local submatrix (possibly different for 3781 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3782 structure. The size of this array is equal to the number 3783 of local rows, i.e 'm'. 3784 3785 If the *_nnz parameter is given then the *_nz parameter is ignored 3786 3787 The AIJ format (also called the Yale sparse matrix format or 3788 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3789 storage. The stored row and column indices begin with zero. 3790 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3791 3792 The parallel matrix is partitioned such that the first m0 rows belong to 3793 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3794 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3795 3796 The DIAGONAL portion of the local submatrix of a processor can be defined 3797 as the submatrix which is obtained by extraction the part corresponding to 3798 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3799 first row that belongs to the processor, r2 is the last row belonging to 3800 the this processor, and c1-c2 is range of indices of the local part of a 3801 vector suitable for applying the matrix to. This is an mxn matrix. In the 3802 common case of a square matrix, the row and column ranges are the same and 3803 the DIAGONAL part is also square. The remaining portion of the local 3804 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3805 3806 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3807 3808 You can call MatGetInfo() to get information on how effective the preallocation was; 3809 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3810 You can also run with the option -info and look for messages with the string 3811 malloc in them to see if additional memory allocation was needed. 3812 3813 Example usage: 3814 3815 Consider the following 8x8 matrix with 34 non-zero values, that is 3816 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3817 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3818 as follows: 3819 3820 .vb 3821 1 2 0 | 0 3 0 | 0 4 3822 Proc0 0 5 6 | 7 0 0 | 8 0 3823 9 0 10 | 11 0 0 | 12 0 3824 ------------------------------------- 3825 13 0 14 | 15 16 17 | 0 0 3826 Proc1 0 18 0 | 19 20 21 | 0 0 3827 0 0 0 | 22 23 0 | 24 0 3828 ------------------------------------- 3829 Proc2 25 26 27 | 0 0 28 | 29 0 3830 30 0 0 | 31 32 33 | 0 34 3831 .ve 3832 3833 This can be represented as a collection of submatrices as: 3834 3835 .vb 3836 A B C 3837 D E F 3838 G H I 3839 .ve 3840 3841 Where the submatrices A,B,C are owned by proc0, D,E,F are 3842 owned by proc1, G,H,I are owned by proc2. 3843 3844 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3845 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3846 The 'M','N' parameters are 8,8, and have the same values on all procs. 3847 3848 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3849 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3850 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3851 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3852 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3853 matrix, ans [DF] as another SeqAIJ matrix. 3854 3855 When d_nz, o_nz parameters are specified, d_nz storage elements are 3856 allocated for every row of the local diagonal submatrix, and o_nz 3857 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3858 One way to choose d_nz and o_nz is to use the max nonzerors per local 3859 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3860 In this case, the values of d_nz,o_nz are: 3861 .vb 3862 proc0 : dnz = 2, o_nz = 2 3863 proc1 : dnz = 3, o_nz = 2 3864 proc2 : dnz = 1, o_nz = 4 3865 .ve 3866 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3867 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3868 for proc3. i.e we are using 12+15+10=37 storage locations to store 3869 34 values. 3870 3871 When d_nnz, o_nnz parameters are specified, the storage is specified 3872 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3873 In the above case the values for d_nnz,o_nnz are: 3874 .vb 3875 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3876 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3877 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3878 .ve 3879 Here the space allocated is sum of all the above values i.e 34, and 3880 hence pre-allocation is perfect. 3881 3882 Level: intermediate 3883 3884 .keywords: matrix, aij, compressed row, sparse, parallel 3885 3886 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3887 MPIAIJ, MatGetInfo() 3888 @*/ 3889 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3890 { 3891 PetscErrorCode ierr; 3892 3893 PetscFunctionBegin; 3894 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3895 PetscValidType(B,1); 3896 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3897 PetscFunctionReturn(0); 3898 } 3899 3900 #undef __FUNCT__ 3901 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3902 /*@ 3903 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3904 CSR format the local rows. 3905 3906 Collective on MPI_Comm 3907 3908 Input Parameters: 3909 + comm - MPI communicator 3910 . m - number of local rows (Cannot be PETSC_DECIDE) 3911 . n - This value should be the same as the local size used in creating the 3912 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3913 calculated if N is given) For square matrices n is almost always m. 3914 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3915 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3916 . i - row indices 3917 . j - column indices 3918 - a - matrix values 3919 3920 Output Parameter: 3921 . mat - the matrix 3922 3923 Level: intermediate 3924 3925 Notes: 3926 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3927 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3928 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3929 3930 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3931 3932 The format which is used for the sparse matrix input, is equivalent to a 3933 row-major ordering.. i.e for the following matrix, the input data expected is 3934 as shown: 3935 3936 1 0 0 3937 2 0 3 P0 3938 ------- 3939 4 5 6 P1 3940 3941 Process0 [P0]: rows_owned=[0,1] 3942 i = {0,1,3} [size = nrow+1 = 2+1] 3943 j = {0,0,2} [size = nz = 6] 3944 v = {1,2,3} [size = nz = 6] 3945 3946 Process1 [P1]: rows_owned=[2] 3947 i = {0,3} [size = nrow+1 = 1+1] 3948 j = {0,1,2} [size = nz = 6] 3949 v = {4,5,6} [size = nz = 6] 3950 3951 .keywords: matrix, aij, compressed row, sparse, parallel 3952 3953 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3954 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3955 @*/ 3956 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3957 { 3958 PetscErrorCode ierr; 3959 3960 PetscFunctionBegin; 3961 if (i[0]) { 3962 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3963 } 3964 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3965 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3966 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3967 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3968 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3969 PetscFunctionReturn(0); 3970 } 3971 3972 #undef __FUNCT__ 3973 #define __FUNCT__ "MatCreateMPIAIJ" 3974 /*@C 3975 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3976 (the default parallel PETSc format). For good matrix assembly performance 3977 the user should preallocate the matrix storage by setting the parameters 3978 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3979 performance can be increased by more than a factor of 50. 3980 3981 Collective on MPI_Comm 3982 3983 Input Parameters: 3984 + comm - MPI communicator 3985 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3986 This value should be the same as the local size used in creating the 3987 y vector for the matrix-vector product y = Ax. 3988 . n - This value should be the same as the local size used in creating the 3989 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3990 calculated if N is given) For square matrices n is almost always m. 3991 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3992 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3993 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3994 (same value is used for all local rows) 3995 . d_nnz - array containing the number of nonzeros in the various rows of the 3996 DIAGONAL portion of the local submatrix (possibly different for each row) 3997 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3998 The size of this array is equal to the number of local rows, i.e 'm'. 3999 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4000 submatrix (same value is used for all local rows). 4001 - o_nnz - array containing the number of nonzeros in the various rows of the 4002 OFF-DIAGONAL portion of the local submatrix (possibly different for 4003 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 4004 structure. The size of this array is equal to the number 4005 of local rows, i.e 'm'. 4006 4007 Output Parameter: 4008 . A - the matrix 4009 4010 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4011 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4012 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4013 4014 Notes: 4015 If the *_nnz parameter is given then the *_nz parameter is ignored 4016 4017 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4018 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4019 storage requirements for this matrix. 4020 4021 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4022 processor than it must be used on all processors that share the object for 4023 that argument. 4024 4025 The user MUST specify either the local or global matrix dimensions 4026 (possibly both). 4027 4028 The parallel matrix is partitioned across processors such that the 4029 first m0 rows belong to process 0, the next m1 rows belong to 4030 process 1, the next m2 rows belong to process 2 etc.. where 4031 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4032 values corresponding to [m x N] submatrix. 4033 4034 The columns are logically partitioned with the n0 columns belonging 4035 to 0th partition, the next n1 columns belonging to the next 4036 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4037 4038 The DIAGONAL portion of the local submatrix on any given processor 4039 is the submatrix corresponding to the rows and columns m,n 4040 corresponding to the given processor. i.e diagonal matrix on 4041 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4042 etc. The remaining portion of the local submatrix [m x (N-n)] 4043 constitute the OFF-DIAGONAL portion. The example below better 4044 illustrates this concept. 4045 4046 For a square global matrix we define each processor's diagonal portion 4047 to be its local rows and the corresponding columns (a square submatrix); 4048 each processor's off-diagonal portion encompasses the remainder of the 4049 local matrix (a rectangular submatrix). 4050 4051 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4052 4053 When calling this routine with a single process communicator, a matrix of 4054 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4055 type of communicator, use the construction mechanism: 4056 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4057 4058 By default, this format uses inodes (identical nodes) when possible. 4059 We search for consecutive rows with the same nonzero structure, thereby 4060 reusing matrix information to achieve increased efficiency. 4061 4062 Options Database Keys: 4063 + -mat_no_inode - Do not use inodes 4064 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4065 - -mat_aij_oneindex - Internally use indexing starting at 1 4066 rather than 0. Note that when calling MatSetValues(), 4067 the user still MUST index entries starting at 0! 4068 4069 4070 Example usage: 4071 4072 Consider the following 8x8 matrix with 34 non-zero values, that is 4073 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4074 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4075 as follows: 4076 4077 .vb 4078 1 2 0 | 0 3 0 | 0 4 4079 Proc0 0 5 6 | 7 0 0 | 8 0 4080 9 0 10 | 11 0 0 | 12 0 4081 ------------------------------------- 4082 13 0 14 | 15 16 17 | 0 0 4083 Proc1 0 18 0 | 19 20 21 | 0 0 4084 0 0 0 | 22 23 0 | 24 0 4085 ------------------------------------- 4086 Proc2 25 26 27 | 0 0 28 | 29 0 4087 30 0 0 | 31 32 33 | 0 34 4088 .ve 4089 4090 This can be represented as a collection of submatrices as: 4091 4092 .vb 4093 A B C 4094 D E F 4095 G H I 4096 .ve 4097 4098 Where the submatrices A,B,C are owned by proc0, D,E,F are 4099 owned by proc1, G,H,I are owned by proc2. 4100 4101 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4102 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4103 The 'M','N' parameters are 8,8, and have the same values on all procs. 4104 4105 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4106 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4107 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4108 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4109 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4110 matrix, ans [DF] as another SeqAIJ matrix. 4111 4112 When d_nz, o_nz parameters are specified, d_nz storage elements are 4113 allocated for every row of the local diagonal submatrix, and o_nz 4114 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4115 One way to choose d_nz and o_nz is to use the max nonzerors per local 4116 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4117 In this case, the values of d_nz,o_nz are: 4118 .vb 4119 proc0 : dnz = 2, o_nz = 2 4120 proc1 : dnz = 3, o_nz = 2 4121 proc2 : dnz = 1, o_nz = 4 4122 .ve 4123 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4124 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4125 for proc3. i.e we are using 12+15+10=37 storage locations to store 4126 34 values. 4127 4128 When d_nnz, o_nnz parameters are specified, the storage is specified 4129 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4130 In the above case the values for d_nnz,o_nnz are: 4131 .vb 4132 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4133 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4134 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4135 .ve 4136 Here the space allocated is sum of all the above values i.e 34, and 4137 hence pre-allocation is perfect. 4138 4139 Level: intermediate 4140 4141 .keywords: matrix, aij, compressed row, sparse, parallel 4142 4143 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4144 MPIAIJ, MatCreateMPIAIJWithArrays() 4145 @*/ 4146 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4147 { 4148 PetscErrorCode ierr; 4149 PetscMPIInt size; 4150 4151 PetscFunctionBegin; 4152 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4153 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4154 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4155 if (size > 1) { 4156 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4157 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4158 } else { 4159 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4160 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4161 } 4162 PetscFunctionReturn(0); 4163 } 4164 4165 #undef __FUNCT__ 4166 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4167 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 4168 { 4169 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4170 4171 PetscFunctionBegin; 4172 *Ad = a->A; 4173 *Ao = a->B; 4174 *colmap = a->garray; 4175 PetscFunctionReturn(0); 4176 } 4177 4178 #undef __FUNCT__ 4179 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4180 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4181 { 4182 PetscErrorCode ierr; 4183 PetscInt i; 4184 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4185 4186 PetscFunctionBegin; 4187 if (coloring->ctype == IS_COLORING_GLOBAL) { 4188 ISColoringValue *allcolors,*colors; 4189 ISColoring ocoloring; 4190 4191 /* set coloring for diagonal portion */ 4192 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4193 4194 /* set coloring for off-diagonal portion */ 4195 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 4196 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4197 for (i=0; i<a->B->cmap->n; i++) { 4198 colors[i] = allcolors[a->garray[i]]; 4199 } 4200 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4201 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4202 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4203 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4204 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4205 ISColoringValue *colors; 4206 PetscInt *larray; 4207 ISColoring ocoloring; 4208 4209 /* set coloring for diagonal portion */ 4210 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4211 for (i=0; i<a->A->cmap->n; i++) { 4212 larray[i] = i + A->cmap->rstart; 4213 } 4214 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 4215 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4216 for (i=0; i<a->A->cmap->n; i++) { 4217 colors[i] = coloring->colors[larray[i]]; 4218 } 4219 ierr = PetscFree(larray);CHKERRQ(ierr); 4220 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4221 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4222 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4223 4224 /* set coloring for off-diagonal portion */ 4225 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4226 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 4227 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4228 for (i=0; i<a->B->cmap->n; i++) { 4229 colors[i] = coloring->colors[larray[i]]; 4230 } 4231 ierr = PetscFree(larray);CHKERRQ(ierr); 4232 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4233 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4234 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4235 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4236 4237 PetscFunctionReturn(0); 4238 } 4239 4240 #if defined(PETSC_HAVE_ADIC) 4241 #undef __FUNCT__ 4242 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 4243 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 4244 { 4245 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4246 PetscErrorCode ierr; 4247 4248 PetscFunctionBegin; 4249 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 4250 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 4251 PetscFunctionReturn(0); 4252 } 4253 #endif 4254 4255 #undef __FUNCT__ 4256 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4257 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4258 { 4259 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4260 PetscErrorCode ierr; 4261 4262 PetscFunctionBegin; 4263 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4264 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4265 PetscFunctionReturn(0); 4266 } 4267 4268 #undef __FUNCT__ 4269 #define __FUNCT__ "MatMergeSymbolic" 4270 PetscErrorCode MatMergeSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4271 { 4272 PetscErrorCode ierr; 4273 PetscInt m,N,i,rstart,nnz,*dnz,*onz; 4274 PetscInt *indx; 4275 4276 PetscFunctionBegin; 4277 /* This routine will ONLY return MPIAIJ type matrix */ 4278 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4279 if (n == PETSC_DECIDE){ 4280 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4281 } 4282 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4283 rstart -= m; 4284 4285 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4286 for (i=0;i<m;i++) { 4287 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4288 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4289 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4290 } 4291 4292 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4293 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4294 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4295 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4296 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4297 PetscFunctionReturn(0); 4298 } 4299 4300 #undef __FUNCT__ 4301 #define __FUNCT__ "MatMergeNumeric" 4302 PetscErrorCode MatMergeNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4303 { 4304 PetscErrorCode ierr; 4305 PetscInt m,N,i,rstart,nnz,Ii; 4306 PetscInt *indx; 4307 PetscScalar *values; 4308 4309 PetscFunctionBegin; 4310 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4311 ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 4312 for (i=0;i<m;i++) { 4313 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4314 Ii = i + rstart; 4315 ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4316 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4317 } 4318 ierr = MatDestroy(&inmat);CHKERRQ(ierr); 4319 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4320 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4321 PetscFunctionReturn(0); 4322 } 4323 4324 #undef __FUNCT__ 4325 #define __FUNCT__ "MatMerge" 4326 /*@ 4327 MatMerge - Creates a single large PETSc matrix by concatinating sequential 4328 matrices from each processor 4329 4330 Collective on MPI_Comm 4331 4332 Input Parameters: 4333 + comm - the communicators the parallel matrix will live on 4334 . inmat - the input sequential matrices 4335 . n - number of local columns (or PETSC_DECIDE) 4336 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4337 4338 Output Parameter: 4339 . outmat - the parallel matrix generated 4340 4341 Level: advanced 4342 4343 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4344 4345 @*/ 4346 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4347 { 4348 PetscErrorCode ierr; 4349 4350 PetscFunctionBegin; 4351 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4352 if (scall == MAT_INITIAL_MATRIX){ 4353 ierr = MatMergeSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4354 } 4355 ierr = MatMergeNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4356 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4357 PetscFunctionReturn(0); 4358 } 4359 4360 #undef __FUNCT__ 4361 #define __FUNCT__ "MatFileSplit" 4362 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4363 { 4364 PetscErrorCode ierr; 4365 PetscMPIInt rank; 4366 PetscInt m,N,i,rstart,nnz; 4367 size_t len; 4368 const PetscInt *indx; 4369 PetscViewer out; 4370 char *name; 4371 Mat B; 4372 const PetscScalar *values; 4373 4374 PetscFunctionBegin; 4375 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4376 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4377 /* Should this be the type of the diagonal block of A? */ 4378 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4379 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4380 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4381 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4382 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4383 for (i=0;i<m;i++) { 4384 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4385 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4386 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4387 } 4388 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4389 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4390 4391 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4392 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4393 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4394 sprintf(name,"%s.%d",outfile,rank); 4395 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4396 ierr = PetscFree(name); 4397 ierr = MatView(B,out);CHKERRQ(ierr); 4398 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4399 ierr = MatDestroy(&B);CHKERRQ(ierr); 4400 PetscFunctionReturn(0); 4401 } 4402 4403 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4404 #undef __FUNCT__ 4405 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4406 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4407 { 4408 PetscErrorCode ierr; 4409 Mat_Merge_SeqsToMPI *merge; 4410 PetscContainer container; 4411 4412 PetscFunctionBegin; 4413 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4414 if (container) { 4415 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4416 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4417 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4418 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4419 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4420 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4421 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4422 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4423 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4424 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4425 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4426 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4427 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4428 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4429 ierr = PetscFree(merge);CHKERRQ(ierr); 4430 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4431 } 4432 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4433 PetscFunctionReturn(0); 4434 } 4435 4436 #include <../src/mat/utils/freespace.h> 4437 #include <petscbt.h> 4438 4439 #undef __FUNCT__ 4440 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 4441 /*@C 4442 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 4443 matrices from each processor 4444 4445 Collective on MPI_Comm 4446 4447 Input Parameters: 4448 + comm - the communicators the parallel matrix will live on 4449 . seqmat - the input sequential matrices 4450 . m - number of local rows (or PETSC_DECIDE) 4451 . n - number of local columns (or PETSC_DECIDE) 4452 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4453 4454 Output Parameter: 4455 . mpimat - the parallel matrix generated 4456 4457 Level: advanced 4458 4459 Notes: 4460 The dimensions of the sequential matrix in each processor MUST be the same. 4461 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4462 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4463 @*/ 4464 PetscErrorCode MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 4465 { 4466 PetscErrorCode ierr; 4467 MPI_Comm comm=((PetscObject)mpimat)->comm; 4468 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4469 PetscMPIInt size,rank,taga,*len_s; 4470 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4471 PetscInt proc,m; 4472 PetscInt **buf_ri,**buf_rj; 4473 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4474 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4475 MPI_Request *s_waits,*r_waits; 4476 MPI_Status *status; 4477 MatScalar *aa=a->a; 4478 MatScalar **abuf_r,*ba_i; 4479 Mat_Merge_SeqsToMPI *merge; 4480 PetscContainer container; 4481 4482 PetscFunctionBegin; 4483 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4484 4485 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4486 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4487 4488 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4489 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4490 4491 bi = merge->bi; 4492 bj = merge->bj; 4493 buf_ri = merge->buf_ri; 4494 buf_rj = merge->buf_rj; 4495 4496 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4497 owners = merge->rowmap->range; 4498 len_s = merge->len_s; 4499 4500 /* send and recv matrix values */ 4501 /*-----------------------------*/ 4502 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4503 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4504 4505 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4506 for (proc=0,k=0; proc<size; proc++){ 4507 if (!len_s[proc]) continue; 4508 i = owners[proc]; 4509 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4510 k++; 4511 } 4512 4513 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4514 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4515 ierr = PetscFree(status);CHKERRQ(ierr); 4516 4517 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4518 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4519 4520 /* insert mat values of mpimat */ 4521 /*----------------------------*/ 4522 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4523 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4524 4525 for (k=0; k<merge->nrecv; k++){ 4526 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4527 nrows = *(buf_ri_k[k]); 4528 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4529 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4530 } 4531 4532 /* set values of ba */ 4533 m = merge->rowmap->n; 4534 for (i=0; i<m; i++) { 4535 arow = owners[rank] + i; 4536 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4537 bnzi = bi[i+1] - bi[i]; 4538 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4539 4540 /* add local non-zero vals of this proc's seqmat into ba */ 4541 anzi = ai[arow+1] - ai[arow]; 4542 aj = a->j + ai[arow]; 4543 aa = a->a + ai[arow]; 4544 nextaj = 0; 4545 for (j=0; nextaj<anzi; j++){ 4546 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4547 ba_i[j] += aa[nextaj++]; 4548 } 4549 } 4550 4551 /* add received vals into ba */ 4552 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4553 /* i-th row */ 4554 if (i == *nextrow[k]) { 4555 anzi = *(nextai[k]+1) - *nextai[k]; 4556 aj = buf_rj[k] + *(nextai[k]); 4557 aa = abuf_r[k] + *(nextai[k]); 4558 nextaj = 0; 4559 for (j=0; nextaj<anzi; j++){ 4560 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4561 ba_i[j] += aa[nextaj++]; 4562 } 4563 } 4564 nextrow[k]++; nextai[k]++; 4565 } 4566 } 4567 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4568 } 4569 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4570 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4571 4572 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4573 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4574 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4575 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4576 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4577 PetscFunctionReturn(0); 4578 } 4579 4580 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4581 4582 #undef __FUNCT__ 4583 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4584 PetscErrorCode MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4585 { 4586 PetscErrorCode ierr; 4587 Mat B_mpi; 4588 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4589 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4590 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4591 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4592 PetscInt len,proc,*dnz,*onz; 4593 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4594 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4595 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4596 MPI_Status *status; 4597 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4598 PetscBT lnkbt; 4599 Mat_Merge_SeqsToMPI *merge; 4600 PetscContainer container; 4601 4602 PetscFunctionBegin; 4603 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4604 4605 /* make sure it is a PETSc comm */ 4606 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4607 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4608 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4609 4610 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4611 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4612 4613 /* determine row ownership */ 4614 /*---------------------------------------------------------*/ 4615 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4616 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4617 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4618 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4619 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4620 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4621 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4622 4623 m = merge->rowmap->n; 4624 M = merge->rowmap->N; 4625 owners = merge->rowmap->range; 4626 4627 /* determine the number of messages to send, their lengths */ 4628 /*---------------------------------------------------------*/ 4629 len_s = merge->len_s; 4630 4631 len = 0; /* length of buf_si[] */ 4632 merge->nsend = 0; 4633 for (proc=0; proc<size; proc++){ 4634 len_si[proc] = 0; 4635 if (proc == rank){ 4636 len_s[proc] = 0; 4637 } else { 4638 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4639 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4640 } 4641 if (len_s[proc]) { 4642 merge->nsend++; 4643 nrows = 0; 4644 for (i=owners[proc]; i<owners[proc+1]; i++){ 4645 if (ai[i+1] > ai[i]) nrows++; 4646 } 4647 len_si[proc] = 2*(nrows+1); 4648 len += len_si[proc]; 4649 } 4650 } 4651 4652 /* determine the number and length of messages to receive for ij-structure */ 4653 /*-------------------------------------------------------------------------*/ 4654 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4655 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4656 4657 /* post the Irecv of j-structure */ 4658 /*-------------------------------*/ 4659 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4660 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4661 4662 /* post the Isend of j-structure */ 4663 /*--------------------------------*/ 4664 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4665 4666 for (proc=0, k=0; proc<size; proc++){ 4667 if (!len_s[proc]) continue; 4668 i = owners[proc]; 4669 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4670 k++; 4671 } 4672 4673 /* receives and sends of j-structure are complete */ 4674 /*------------------------------------------------*/ 4675 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4676 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4677 4678 /* send and recv i-structure */ 4679 /*---------------------------*/ 4680 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4681 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4682 4683 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4684 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4685 for (proc=0,k=0; proc<size; proc++){ 4686 if (!len_s[proc]) continue; 4687 /* form outgoing message for i-structure: 4688 buf_si[0]: nrows to be sent 4689 [1:nrows]: row index (global) 4690 [nrows+1:2*nrows+1]: i-structure index 4691 */ 4692 /*-------------------------------------------*/ 4693 nrows = len_si[proc]/2 - 1; 4694 buf_si_i = buf_si + nrows+1; 4695 buf_si[0] = nrows; 4696 buf_si_i[0] = 0; 4697 nrows = 0; 4698 for (i=owners[proc]; i<owners[proc+1]; i++){ 4699 anzi = ai[i+1] - ai[i]; 4700 if (anzi) { 4701 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4702 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4703 nrows++; 4704 } 4705 } 4706 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4707 k++; 4708 buf_si += len_si[proc]; 4709 } 4710 4711 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4712 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4713 4714 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4715 for (i=0; i<merge->nrecv; i++){ 4716 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4717 } 4718 4719 ierr = PetscFree(len_si);CHKERRQ(ierr); 4720 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4721 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4722 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4723 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4724 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4725 ierr = PetscFree(status);CHKERRQ(ierr); 4726 4727 /* compute a local seq matrix in each processor */ 4728 /*----------------------------------------------*/ 4729 /* allocate bi array and free space for accumulating nonzero column info */ 4730 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4731 bi[0] = 0; 4732 4733 /* create and initialize a linked list */ 4734 nlnk = N+1; 4735 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4736 4737 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4738 len = 0; 4739 len = ai[owners[rank+1]] - ai[owners[rank]]; 4740 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4741 current_space = free_space; 4742 4743 /* determine symbolic info for each local row */ 4744 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4745 4746 for (k=0; k<merge->nrecv; k++){ 4747 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4748 nrows = *buf_ri_k[k]; 4749 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4750 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4751 } 4752 4753 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4754 len = 0; 4755 for (i=0;i<m;i++) { 4756 bnzi = 0; 4757 /* add local non-zero cols of this proc's seqmat into lnk */ 4758 arow = owners[rank] + i; 4759 anzi = ai[arow+1] - ai[arow]; 4760 aj = a->j + ai[arow]; 4761 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4762 bnzi += nlnk; 4763 /* add received col data into lnk */ 4764 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4765 if (i == *nextrow[k]) { /* i-th row */ 4766 anzi = *(nextai[k]+1) - *nextai[k]; 4767 aj = buf_rj[k] + *nextai[k]; 4768 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4769 bnzi += nlnk; 4770 nextrow[k]++; nextai[k]++; 4771 } 4772 } 4773 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4774 4775 /* if free space is not available, make more free space */ 4776 if (current_space->local_remaining<bnzi) { 4777 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4778 nspacedouble++; 4779 } 4780 /* copy data into free space, then initialize lnk */ 4781 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4782 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4783 4784 current_space->array += bnzi; 4785 current_space->local_used += bnzi; 4786 current_space->local_remaining -= bnzi; 4787 4788 bi[i+1] = bi[i] + bnzi; 4789 } 4790 4791 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4792 4793 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4794 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4795 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4796 4797 /* create symbolic parallel matrix B_mpi */ 4798 /*---------------------------------------*/ 4799 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4800 if (n==PETSC_DECIDE) { 4801 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4802 } else { 4803 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4804 } 4805 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4806 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4807 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4808 4809 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4810 B_mpi->assembled = PETSC_FALSE; 4811 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4812 merge->bi = bi; 4813 merge->bj = bj; 4814 merge->buf_ri = buf_ri; 4815 merge->buf_rj = buf_rj; 4816 merge->coi = PETSC_NULL; 4817 merge->coj = PETSC_NULL; 4818 merge->owners_co = PETSC_NULL; 4819 4820 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4821 4822 /* attach the supporting struct to B_mpi for reuse */ 4823 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4824 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4825 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4826 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4827 *mpimat = B_mpi; 4828 4829 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4830 PetscFunctionReturn(0); 4831 } 4832 4833 #undef __FUNCT__ 4834 #define __FUNCT__ "MatMerge_SeqsToMPI" 4835 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4836 { 4837 PetscErrorCode ierr; 4838 4839 PetscFunctionBegin; 4840 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4841 if (scall == MAT_INITIAL_MATRIX){ 4842 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4843 } 4844 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4845 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4846 PetscFunctionReturn(0); 4847 } 4848 4849 #undef __FUNCT__ 4850 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4851 /*@ 4852 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4853 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4854 with MatGetSize() 4855 4856 Not Collective 4857 4858 Input Parameters: 4859 + A - the matrix 4860 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4861 4862 Output Parameter: 4863 . A_loc - the local sequential matrix generated 4864 4865 Level: developer 4866 4867 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4868 4869 @*/ 4870 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4871 { 4872 PetscErrorCode ierr; 4873 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4874 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4875 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4876 MatScalar *aa=a->a,*ba=b->a,*cam; 4877 PetscScalar *ca; 4878 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4879 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4880 PetscBool match; 4881 4882 PetscFunctionBegin; 4883 ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4884 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4885 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4886 if (scall == MAT_INITIAL_MATRIX){ 4887 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4888 ci[0] = 0; 4889 for (i=0; i<am; i++){ 4890 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4891 } 4892 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4893 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4894 k = 0; 4895 for (i=0; i<am; i++) { 4896 ncols_o = bi[i+1] - bi[i]; 4897 ncols_d = ai[i+1] - ai[i]; 4898 /* off-diagonal portion of A */ 4899 for (jo=0; jo<ncols_o; jo++) { 4900 col = cmap[*bj]; 4901 if (col >= cstart) break; 4902 cj[k] = col; bj++; 4903 ca[k++] = *ba++; 4904 } 4905 /* diagonal portion of A */ 4906 for (j=0; j<ncols_d; j++) { 4907 cj[k] = cstart + *aj++; 4908 ca[k++] = *aa++; 4909 } 4910 /* off-diagonal portion of A */ 4911 for (j=jo; j<ncols_o; j++) { 4912 cj[k] = cmap[*bj++]; 4913 ca[k++] = *ba++; 4914 } 4915 } 4916 /* put together the new matrix */ 4917 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4918 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4919 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4920 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4921 mat->free_a = PETSC_TRUE; 4922 mat->free_ij = PETSC_TRUE; 4923 mat->nonew = 0; 4924 } else if (scall == MAT_REUSE_MATRIX){ 4925 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4926 ci = mat->i; cj = mat->j; cam = mat->a; 4927 for (i=0; i<am; i++) { 4928 /* off-diagonal portion of A */ 4929 ncols_o = bi[i+1] - bi[i]; 4930 for (jo=0; jo<ncols_o; jo++) { 4931 col = cmap[*bj]; 4932 if (col >= cstart) break; 4933 *cam++ = *ba++; bj++; 4934 } 4935 /* diagonal portion of A */ 4936 ncols_d = ai[i+1] - ai[i]; 4937 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4938 /* off-diagonal portion of A */ 4939 for (j=jo; j<ncols_o; j++) { 4940 *cam++ = *ba++; bj++; 4941 } 4942 } 4943 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4944 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4945 PetscFunctionReturn(0); 4946 } 4947 4948 #undef __FUNCT__ 4949 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 4950 /*@C 4951 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 4952 4953 Not Collective 4954 4955 Input Parameters: 4956 + A - the matrix 4957 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4958 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4959 4960 Output Parameter: 4961 . A_loc - the local sequential matrix generated 4962 4963 Level: developer 4964 4965 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 4966 4967 @*/ 4968 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4969 { 4970 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4971 PetscErrorCode ierr; 4972 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4973 IS isrowa,iscola; 4974 Mat *aloc; 4975 PetscBool match; 4976 4977 PetscFunctionBegin; 4978 ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4979 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4980 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4981 if (!row){ 4982 start = A->rmap->rstart; end = A->rmap->rend; 4983 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4984 } else { 4985 isrowa = *row; 4986 } 4987 if (!col){ 4988 start = A->cmap->rstart; 4989 cmap = a->garray; 4990 nzA = a->A->cmap->n; 4991 nzB = a->B->cmap->n; 4992 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4993 ncols = 0; 4994 for (i=0; i<nzB; i++) { 4995 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4996 else break; 4997 } 4998 imark = i; 4999 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5000 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5001 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5002 } else { 5003 iscola = *col; 5004 } 5005 if (scall != MAT_INITIAL_MATRIX){ 5006 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5007 aloc[0] = *A_loc; 5008 } 5009 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5010 *A_loc = aloc[0]; 5011 ierr = PetscFree(aloc);CHKERRQ(ierr); 5012 if (!row){ 5013 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5014 } 5015 if (!col){ 5016 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5017 } 5018 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5019 PetscFunctionReturn(0); 5020 } 5021 5022 #undef __FUNCT__ 5023 #define __FUNCT__ "MatGetBrowsOfAcols" 5024 /*@C 5025 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5026 5027 Collective on Mat 5028 5029 Input Parameters: 5030 + A,B - the matrices in mpiaij format 5031 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5032 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 5033 5034 Output Parameter: 5035 + rowb, colb - index sets of rows and columns of B to extract 5036 - B_seq - the sequential matrix generated 5037 5038 Level: developer 5039 5040 @*/ 5041 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5042 { 5043 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5044 PetscErrorCode ierr; 5045 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5046 IS isrowb,iscolb; 5047 Mat *bseq=PETSC_NULL; 5048 5049 PetscFunctionBegin; 5050 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5051 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5052 } 5053 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5054 5055 if (scall == MAT_INITIAL_MATRIX){ 5056 start = A->cmap->rstart; 5057 cmap = a->garray; 5058 nzA = a->A->cmap->n; 5059 nzB = a->B->cmap->n; 5060 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5061 ncols = 0; 5062 for (i=0; i<nzB; i++) { /* row < local row index */ 5063 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5064 else break; 5065 } 5066 imark = i; 5067 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5068 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5069 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5070 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5071 } else { 5072 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5073 isrowb = *rowb; iscolb = *colb; 5074 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5075 bseq[0] = *B_seq; 5076 } 5077 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5078 *B_seq = bseq[0]; 5079 ierr = PetscFree(bseq);CHKERRQ(ierr); 5080 if (!rowb){ 5081 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5082 } else { 5083 *rowb = isrowb; 5084 } 5085 if (!colb){ 5086 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5087 } else { 5088 *colb = iscolb; 5089 } 5090 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5091 PetscFunctionReturn(0); 5092 } 5093 5094 #undef __FUNCT__ 5095 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5096 /* 5097 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5098 of the OFF-DIAGONAL portion of local A 5099 5100 Collective on Mat 5101 5102 Input Parameters: 5103 + A,B - the matrices in mpiaij format 5104 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5105 5106 Output Parameter: 5107 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5108 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5109 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 5110 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5111 5112 Level: developer 5113 5114 */ 5115 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5116 { 5117 VecScatter_MPI_General *gen_to,*gen_from; 5118 PetscErrorCode ierr; 5119 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5120 Mat_SeqAIJ *b_oth; 5121 VecScatter ctx=a->Mvctx; 5122 MPI_Comm comm=((PetscObject)ctx)->comm; 5123 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5124 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5125 PetscScalar *rvalues,*svalues; 5126 MatScalar *b_otha,*bufa,*bufA; 5127 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5128 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 5129 MPI_Status *sstatus,rstatus; 5130 PetscMPIInt jj; 5131 PetscInt *cols,sbs,rbs; 5132 PetscScalar *vals; 5133 5134 PetscFunctionBegin; 5135 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5136 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5137 } 5138 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5139 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5140 5141 gen_to = (VecScatter_MPI_General*)ctx->todata; 5142 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5143 rvalues = gen_from->values; /* holds the length of receiving row */ 5144 svalues = gen_to->values; /* holds the length of sending row */ 5145 nrecvs = gen_from->n; 5146 nsends = gen_to->n; 5147 5148 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5149 srow = gen_to->indices; /* local row index to be sent */ 5150 sstarts = gen_to->starts; 5151 sprocs = gen_to->procs; 5152 sstatus = gen_to->sstatus; 5153 sbs = gen_to->bs; 5154 rstarts = gen_from->starts; 5155 rprocs = gen_from->procs; 5156 rbs = gen_from->bs; 5157 5158 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5159 if (scall == MAT_INITIAL_MATRIX){ 5160 /* i-array */ 5161 /*---------*/ 5162 /* post receives */ 5163 for (i=0; i<nrecvs; i++){ 5164 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5165 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5166 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5167 } 5168 5169 /* pack the outgoing message */ 5170 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5171 sstartsj[0] = 0; rstartsj[0] = 0; 5172 len = 0; /* total length of j or a array to be sent */ 5173 k = 0; 5174 for (i=0; i<nsends; i++){ 5175 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5176 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5177 for (j=0; j<nrows; j++) { 5178 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5179 for (l=0; l<sbs; l++){ 5180 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 5181 rowlen[j*sbs+l] = ncols; 5182 len += ncols; 5183 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 5184 } 5185 k++; 5186 } 5187 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5188 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5189 } 5190 /* recvs and sends of i-array are completed */ 5191 i = nrecvs; 5192 while (i--) { 5193 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5194 } 5195 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5196 5197 /* allocate buffers for sending j and a arrays */ 5198 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5199 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5200 5201 /* create i-array of B_oth */ 5202 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5203 b_othi[0] = 0; 5204 len = 0; /* total length of j or a array to be received */ 5205 k = 0; 5206 for (i=0; i<nrecvs; i++){ 5207 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5208 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5209 for (j=0; j<nrows; j++) { 5210 b_othi[k+1] = b_othi[k] + rowlen[j]; 5211 len += rowlen[j]; k++; 5212 } 5213 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5214 } 5215 5216 /* allocate space for j and a arrrays of B_oth */ 5217 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5218 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5219 5220 /* j-array */ 5221 /*---------*/ 5222 /* post receives of j-array */ 5223 for (i=0; i<nrecvs; i++){ 5224 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5225 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5226 } 5227 5228 /* pack the outgoing message j-array */ 5229 k = 0; 5230 for (i=0; i<nsends; i++){ 5231 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5232 bufJ = bufj+sstartsj[i]; 5233 for (j=0; j<nrows; j++) { 5234 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5235 for (ll=0; ll<sbs; ll++){ 5236 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5237 for (l=0; l<ncols; l++){ 5238 *bufJ++ = cols[l]; 5239 } 5240 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5241 } 5242 } 5243 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5244 } 5245 5246 /* recvs and sends of j-array are completed */ 5247 i = nrecvs; 5248 while (i--) { 5249 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5250 } 5251 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5252 } else if (scall == MAT_REUSE_MATRIX){ 5253 sstartsj = *startsj_s; 5254 rstartsj = *startsj_r; 5255 bufa = *bufa_ptr; 5256 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5257 b_otha = b_oth->a; 5258 } else { 5259 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5260 } 5261 5262 /* a-array */ 5263 /*---------*/ 5264 /* post receives of a-array */ 5265 for (i=0; i<nrecvs; i++){ 5266 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5267 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5268 } 5269 5270 /* pack the outgoing message a-array */ 5271 k = 0; 5272 for (i=0; i<nsends; i++){ 5273 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5274 bufA = bufa+sstartsj[i]; 5275 for (j=0; j<nrows; j++) { 5276 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5277 for (ll=0; ll<sbs; ll++){ 5278 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5279 for (l=0; l<ncols; l++){ 5280 *bufA++ = vals[l]; 5281 } 5282 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5283 } 5284 } 5285 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5286 } 5287 /* recvs and sends of a-array are completed */ 5288 i = nrecvs; 5289 while (i--) { 5290 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5291 } 5292 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5293 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5294 5295 if (scall == MAT_INITIAL_MATRIX){ 5296 /* put together the new matrix */ 5297 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5298 5299 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5300 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5301 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5302 b_oth->free_a = PETSC_TRUE; 5303 b_oth->free_ij = PETSC_TRUE; 5304 b_oth->nonew = 0; 5305 5306 ierr = PetscFree(bufj);CHKERRQ(ierr); 5307 if (!startsj_s || !bufa_ptr){ 5308 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5309 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5310 } else { 5311 *startsj_s = sstartsj; 5312 *startsj_r = rstartsj; 5313 *bufa_ptr = bufa; 5314 } 5315 } 5316 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5317 PetscFunctionReturn(0); 5318 } 5319 5320 #undef __FUNCT__ 5321 #define __FUNCT__ "MatGetCommunicationStructs" 5322 /*@C 5323 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5324 5325 Not Collective 5326 5327 Input Parameters: 5328 . A - The matrix in mpiaij format 5329 5330 Output Parameter: 5331 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5332 . colmap - A map from global column index to local index into lvec 5333 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5334 5335 Level: developer 5336 5337 @*/ 5338 #if defined (PETSC_USE_CTABLE) 5339 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5340 #else 5341 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5342 #endif 5343 { 5344 Mat_MPIAIJ *a; 5345 5346 PetscFunctionBegin; 5347 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5348 PetscValidPointer(lvec, 2); 5349 PetscValidPointer(colmap, 3); 5350 PetscValidPointer(multScatter, 4); 5351 a = (Mat_MPIAIJ *) A->data; 5352 if (lvec) *lvec = a->lvec; 5353 if (colmap) *colmap = a->colmap; 5354 if (multScatter) *multScatter = a->Mvctx; 5355 PetscFunctionReturn(0); 5356 } 5357 5358 EXTERN_C_BEGIN 5359 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*); 5360 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*); 5361 extern PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 5362 EXTERN_C_END 5363 5364 #undef __FUNCT__ 5365 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5366 /* 5367 Computes (B'*A')' since computing B*A directly is untenable 5368 5369 n p p 5370 ( ) ( ) ( ) 5371 m ( A ) * n ( B ) = m ( C ) 5372 ( ) ( ) ( ) 5373 5374 */ 5375 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5376 { 5377 PetscErrorCode ierr; 5378 Mat At,Bt,Ct; 5379 5380 PetscFunctionBegin; 5381 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5382 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5383 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5384 ierr = MatDestroy(&At);CHKERRQ(ierr); 5385 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5386 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5387 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5388 PetscFunctionReturn(0); 5389 } 5390 5391 #undef __FUNCT__ 5392 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5393 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5394 { 5395 PetscErrorCode ierr; 5396 PetscInt m=A->rmap->n,n=B->cmap->n; 5397 Mat Cmat; 5398 5399 PetscFunctionBegin; 5400 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5401 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5402 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5403 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5404 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5405 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5406 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5407 *C = Cmat; 5408 (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ; 5409 PetscFunctionReturn(0); 5410 } 5411 5412 /* ----------------------------------------------------------------*/ 5413 #undef __FUNCT__ 5414 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5415 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5416 { 5417 PetscErrorCode ierr; 5418 5419 PetscFunctionBegin; 5420 if (scall == MAT_INITIAL_MATRIX){ 5421 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5422 } 5423 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5424 PetscFunctionReturn(0); 5425 } 5426 5427 EXTERN_C_BEGIN 5428 #if defined(PETSC_HAVE_MUMPS) 5429 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5430 #endif 5431 #if defined(PETSC_HAVE_PASTIX) 5432 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5433 #endif 5434 #if defined(PETSC_HAVE_SUPERLU_DIST) 5435 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5436 #endif 5437 #if defined(PETSC_HAVE_SPOOLES) 5438 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5439 #endif 5440 EXTERN_C_END 5441 5442 /*MC 5443 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5444 5445 Options Database Keys: 5446 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5447 5448 Level: beginner 5449 5450 .seealso: MatCreateMPIAIJ() 5451 M*/ 5452 5453 EXTERN_C_BEGIN 5454 #undef __FUNCT__ 5455 #define __FUNCT__ "MatCreate_MPIAIJ" 5456 PetscErrorCode MatCreate_MPIAIJ(Mat B) 5457 { 5458 Mat_MPIAIJ *b; 5459 PetscErrorCode ierr; 5460 PetscMPIInt size; 5461 5462 PetscFunctionBegin; 5463 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5464 5465 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5466 B->data = (void*)b; 5467 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5468 B->rmap->bs = 1; 5469 B->assembled = PETSC_FALSE; 5470 5471 B->insertmode = NOT_SET_VALUES; 5472 b->size = size; 5473 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5474 5475 /* build cache for off array entries formed */ 5476 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5477 b->donotstash = PETSC_FALSE; 5478 b->colmap = 0; 5479 b->garray = 0; 5480 b->roworiented = PETSC_TRUE; 5481 5482 /* stuff used for matrix vector multiply */ 5483 b->lvec = PETSC_NULL; 5484 b->Mvctx = PETSC_NULL; 5485 5486 /* stuff for MatGetRow() */ 5487 b->rowindices = 0; 5488 b->rowvalues = 0; 5489 b->getrowactive = PETSC_FALSE; 5490 5491 #if defined(PETSC_HAVE_SPOOLES) 5492 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5493 "MatGetFactor_mpiaij_spooles", 5494 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5495 #endif 5496 #if defined(PETSC_HAVE_MUMPS) 5497 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5498 "MatGetFactor_aij_mumps", 5499 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5500 #endif 5501 #if defined(PETSC_HAVE_PASTIX) 5502 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5503 "MatGetFactor_mpiaij_pastix", 5504 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5505 #endif 5506 #if defined(PETSC_HAVE_SUPERLU_DIST) 5507 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5508 "MatGetFactor_mpiaij_superlu_dist", 5509 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5510 #endif 5511 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5512 "MatStoreValues_MPIAIJ", 5513 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5514 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5515 "MatRetrieveValues_MPIAIJ", 5516 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5517 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5518 "MatGetDiagonalBlock_MPIAIJ", 5519 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5520 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5521 "MatIsTranspose_MPIAIJ", 5522 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5523 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5524 "MatMPIAIJSetPreallocation_MPIAIJ", 5525 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5526 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5527 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5528 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5529 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5530 "MatDiagonalScaleLocal_MPIAIJ", 5531 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5532 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5533 "MatConvert_MPIAIJ_MPIAIJPERM", 5534 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5535 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5536 "MatConvert_MPIAIJ_MPIAIJCRL", 5537 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5538 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5539 "MatConvert_MPIAIJ_MPISBAIJ", 5540 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5541 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5542 "MatMatMult_MPIDense_MPIAIJ", 5543 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5544 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5545 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5546 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5547 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5548 "MatMatMultNumeric_MPIDense_MPIAIJ", 5549 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5550 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5551 PetscFunctionReturn(0); 5552 } 5553 EXTERN_C_END 5554 5555 #undef __FUNCT__ 5556 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5557 /*@ 5558 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5559 and "off-diagonal" part of the matrix in CSR format. 5560 5561 Collective on MPI_Comm 5562 5563 Input Parameters: 5564 + comm - MPI communicator 5565 . m - number of local rows (Cannot be PETSC_DECIDE) 5566 . n - This value should be the same as the local size used in creating the 5567 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5568 calculated if N is given) For square matrices n is almost always m. 5569 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5570 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5571 . i - row indices for "diagonal" portion of matrix 5572 . j - column indices 5573 . a - matrix values 5574 . oi - row indices for "off-diagonal" portion of matrix 5575 . oj - column indices 5576 - oa - matrix values 5577 5578 Output Parameter: 5579 . mat - the matrix 5580 5581 Level: advanced 5582 5583 Notes: 5584 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5585 must free the arrays once the matrix has been destroyed and not before. 5586 5587 The i and j indices are 0 based 5588 5589 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5590 5591 This sets local rows and cannot be used to set off-processor values. 5592 5593 You cannot later use MatSetValues() to change values in this matrix. 5594 5595 .keywords: matrix, aij, compressed row, sparse, parallel 5596 5597 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5598 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 5599 @*/ 5600 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5601 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5602 { 5603 PetscErrorCode ierr; 5604 Mat_MPIAIJ *maij; 5605 5606 PetscFunctionBegin; 5607 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5608 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5609 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5610 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5611 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5612 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5613 maij = (Mat_MPIAIJ*) (*mat)->data; 5614 maij->donotstash = PETSC_TRUE; 5615 (*mat)->preallocated = PETSC_TRUE; 5616 5617 ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 5618 ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5619 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5620 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5621 5622 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5623 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5624 5625 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5626 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5627 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5628 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5629 5630 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5631 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5632 PetscFunctionReturn(0); 5633 } 5634 5635 /* 5636 Special version for direct calls from Fortran 5637 */ 5638 #include <private/fortranimpl.h> 5639 5640 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5641 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5642 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5643 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5644 #endif 5645 5646 /* Change these macros so can be used in void function */ 5647 #undef CHKERRQ 5648 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5649 #undef SETERRQ2 5650 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5651 #undef SETERRQ3 5652 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5653 #undef SETERRQ 5654 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5655 5656 EXTERN_C_BEGIN 5657 #undef __FUNCT__ 5658 #define __FUNCT__ "matsetvaluesmpiaij_" 5659 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5660 { 5661 Mat mat = *mmat; 5662 PetscInt m = *mm, n = *mn; 5663 InsertMode addv = *maddv; 5664 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5665 PetscScalar value; 5666 PetscErrorCode ierr; 5667 5668 MatCheckPreallocated(mat,1); 5669 if (mat->insertmode == NOT_SET_VALUES) { 5670 mat->insertmode = addv; 5671 } 5672 #if defined(PETSC_USE_DEBUG) 5673 else if (mat->insertmode != addv) { 5674 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5675 } 5676 #endif 5677 { 5678 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5679 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5680 PetscBool roworiented = aij->roworiented; 5681 5682 /* Some Variables required in the macro */ 5683 Mat A = aij->A; 5684 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5685 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5686 MatScalar *aa = a->a; 5687 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5688 Mat B = aij->B; 5689 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5690 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5691 MatScalar *ba = b->a; 5692 5693 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5694 PetscInt nonew = a->nonew; 5695 MatScalar *ap1,*ap2; 5696 5697 PetscFunctionBegin; 5698 for (i=0; i<m; i++) { 5699 if (im[i] < 0) continue; 5700 #if defined(PETSC_USE_DEBUG) 5701 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5702 #endif 5703 if (im[i] >= rstart && im[i] < rend) { 5704 row = im[i] - rstart; 5705 lastcol1 = -1; 5706 rp1 = aj + ai[row]; 5707 ap1 = aa + ai[row]; 5708 rmax1 = aimax[row]; 5709 nrow1 = ailen[row]; 5710 low1 = 0; 5711 high1 = nrow1; 5712 lastcol2 = -1; 5713 rp2 = bj + bi[row]; 5714 ap2 = ba + bi[row]; 5715 rmax2 = bimax[row]; 5716 nrow2 = bilen[row]; 5717 low2 = 0; 5718 high2 = nrow2; 5719 5720 for (j=0; j<n; j++) { 5721 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5722 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5723 if (in[j] >= cstart && in[j] < cend){ 5724 col = in[j] - cstart; 5725 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5726 } else if (in[j] < 0) continue; 5727 #if defined(PETSC_USE_DEBUG) 5728 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5729 #endif 5730 else { 5731 if (mat->was_assembled) { 5732 if (!aij->colmap) { 5733 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5734 } 5735 #if defined (PETSC_USE_CTABLE) 5736 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5737 col--; 5738 #else 5739 col = aij->colmap[in[j]] - 1; 5740 #endif 5741 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5742 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5743 col = in[j]; 5744 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5745 B = aij->B; 5746 b = (Mat_SeqAIJ*)B->data; 5747 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5748 rp2 = bj + bi[row]; 5749 ap2 = ba + bi[row]; 5750 rmax2 = bimax[row]; 5751 nrow2 = bilen[row]; 5752 low2 = 0; 5753 high2 = nrow2; 5754 bm = aij->B->rmap->n; 5755 ba = b->a; 5756 } 5757 } else col = in[j]; 5758 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5759 } 5760 } 5761 } else { 5762 if (!aij->donotstash) { 5763 if (roworiented) { 5764 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5765 } else { 5766 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5767 } 5768 } 5769 } 5770 }} 5771 PetscFunctionReturnVoid(); 5772 } 5773 EXTERN_C_END 5774 5775