1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 /*MC 6 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 7 8 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 9 and MATMPIAIJ otherwise. As a result, for single process communicators, 10 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 11 for communicators controlling multiple processes. It is recommended that you call both of 12 the above preallocation routines for simplicity. 13 14 Options Database Keys: 15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 16 17 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 18 enough exist. 19 20 Level: beginner 21 22 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 23 M*/ 24 25 /*MC 26 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 27 28 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 29 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 30 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 31 for communicators controlling multiple processes. It is recommended that you call both of 32 the above preallocation routines for simplicity. 33 34 Options Database Keys: 35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 36 37 Level: beginner 38 39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 40 M*/ 41 42 #undef __FUNCT__ 43 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 44 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 45 { 46 PetscErrorCode ierr; 47 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 48 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 49 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 50 const PetscInt *ia,*ib; 51 const MatScalar *aa,*bb; 52 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 53 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 54 55 PetscFunctionBegin; 56 *keptrows = 0; 57 ia = a->i; 58 ib = b->i; 59 for (i=0; i<m; i++) { 60 na = ia[i+1] - ia[i]; 61 nb = ib[i+1] - ib[i]; 62 if (!na && !nb) { 63 cnt++; 64 goto ok1; 65 } 66 aa = a->a + ia[i]; 67 for (j=0; j<na; j++) { 68 if (aa[j] != 0.0) goto ok1; 69 } 70 bb = b->a + ib[i]; 71 for (j=0; j <nb; j++) { 72 if (bb[j] != 0.0) goto ok1; 73 } 74 cnt++; 75 ok1:; 76 } 77 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr); 78 if (!n0rows) PetscFunctionReturn(0); 79 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 80 cnt = 0; 81 for (i=0; i<m; i++) { 82 na = ia[i+1] - ia[i]; 83 nb = ib[i+1] - ib[i]; 84 if (!na && !nb) continue; 85 aa = a->a + ia[i]; 86 for (j=0; j<na;j++) { 87 if (aa[j] != 0.0) { 88 rows[cnt++] = rstart + i; 89 goto ok2; 90 } 91 } 92 bb = b->a + ib[i]; 93 for (j=0; j<nb; j++) { 94 if (bb[j] != 0.0) { 95 rows[cnt++] = rstart + i; 96 goto ok2; 97 } 98 } 99 ok2:; 100 } 101 ierr = ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 102 PetscFunctionReturn(0); 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 107 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 108 { 109 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 110 PetscErrorCode ierr; 111 PetscInt i,rstart,nrows,*rows; 112 113 PetscFunctionBegin; 114 *zrows = PETSC_NULL; 115 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 116 ierr = MatGetOwnershipRange(M,&rstart,PETSC_NULL);CHKERRQ(ierr); 117 for (i=0; i<nrows; i++) rows[i] += rstart; 118 ierr = ISCreateGeneral(((PetscObject)M)->comm,nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 119 PetscFunctionReturn(0); 120 } 121 122 #undef __FUNCT__ 123 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 124 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 125 { 126 PetscErrorCode ierr; 127 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 128 PetscInt i,n,*garray = aij->garray; 129 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 130 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 131 PetscReal *work; 132 133 PetscFunctionBegin; 134 ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr); 135 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 136 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 137 if (type == NORM_2) { 138 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 139 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 140 } 141 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 142 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 143 } 144 } else if (type == NORM_1) { 145 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 146 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 147 } 148 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 149 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 150 } 151 } else if (type == NORM_INFINITY) { 152 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 153 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 154 } 155 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 156 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 157 } 158 159 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType"); 160 if (type == NORM_INFINITY) { 161 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 162 } else { 163 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 164 } 165 ierr = PetscFree(work);CHKERRQ(ierr); 166 if (type == NORM_2) { 167 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 168 } 169 PetscFunctionReturn(0); 170 } 171 172 #undef __FUNCT__ 173 #define __FUNCT__ "MatDistribute_MPIAIJ" 174 /* 175 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 176 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 177 178 Only for square matrices 179 */ 180 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 181 { 182 PetscMPIInt rank,size; 183 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld,bses[2]; 184 PetscErrorCode ierr; 185 Mat mat; 186 Mat_SeqAIJ *gmata; 187 PetscMPIInt tag; 188 MPI_Status status; 189 PetscBool aij; 190 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 191 192 PetscFunctionBegin; 193 CHKMEMQ; 194 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 195 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 196 if (!rank) { 197 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 198 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 199 } 200 if (reuse == MAT_INITIAL_MATRIX) { 201 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 202 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 203 if (!rank) { 204 bses[0] = gmat->rmap->bs; 205 bses[1] = gmat->cmap->bs; 206 } 207 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 208 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 209 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 210 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 211 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 212 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 213 rowners[0] = 0; 214 for (i=2; i<=size; i++) { 215 rowners[i] += rowners[i-1]; 216 } 217 rstart = rowners[rank]; 218 rend = rowners[rank+1]; 219 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 220 if (!rank) { 221 gmata = (Mat_SeqAIJ*) gmat->data; 222 /* send row lengths to all processors */ 223 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 224 for (i=1; i<size; i++) { 225 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 226 } 227 /* determine number diagonal and off-diagonal counts */ 228 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 229 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 230 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 231 jj = 0; 232 for (i=0; i<m; i++) { 233 for (j=0; j<dlens[i]; j++) { 234 if (gmata->j[jj] < rstart) ld[i]++; 235 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 236 jj++; 237 } 238 } 239 /* send column indices to other processes */ 240 for (i=1; i<size; i++) { 241 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 242 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 243 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 244 } 245 246 /* send numerical values to other processes */ 247 for (i=1; i<size; i++) { 248 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 249 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 250 } 251 gmataa = gmata->a; 252 gmataj = gmata->j; 253 254 } else { 255 /* receive row lengths */ 256 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 257 /* receive column indices */ 258 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 259 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 260 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 261 /* determine number diagonal and off-diagonal counts */ 262 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 263 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 264 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 265 jj = 0; 266 for (i=0; i<m; i++) { 267 for (j=0; j<dlens[i]; j++) { 268 if (gmataj[jj] < rstart) ld[i]++; 269 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 270 jj++; 271 } 272 } 273 /* receive numerical values */ 274 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 275 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 276 } 277 /* set preallocation */ 278 for (i=0; i<m; i++) { 279 dlens[i] -= olens[i]; 280 } 281 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 282 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 283 284 for (i=0; i<m; i++) { 285 dlens[i] += olens[i]; 286 } 287 cnt = 0; 288 for (i=0; i<m; i++) { 289 row = rstart + i; 290 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 291 cnt += dlens[i]; 292 } 293 if (rank) { 294 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 295 } 296 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 297 ierr = PetscFree(rowners);CHKERRQ(ierr); 298 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 299 *inmat = mat; 300 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 301 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 302 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 303 mat = *inmat; 304 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 305 if (!rank) { 306 /* send numerical values to other processes */ 307 gmata = (Mat_SeqAIJ*) gmat->data; 308 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 309 gmataa = gmata->a; 310 for (i=1; i<size; i++) { 311 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 312 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 313 } 314 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 315 } else { 316 /* receive numerical values from process 0*/ 317 nz = Ad->nz + Ao->nz; 318 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 319 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 320 } 321 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 322 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 323 ad = Ad->a; 324 ao = Ao->a; 325 if (mat->rmap->n) { 326 i = 0; 327 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 328 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 329 } 330 for (i=1; i<mat->rmap->n; i++) { 331 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 332 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 333 } 334 i--; 335 if (mat->rmap->n) { 336 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 337 } 338 if (rank) { 339 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 340 } 341 } 342 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 343 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 344 CHKMEMQ; 345 PetscFunctionReturn(0); 346 } 347 348 /* 349 Local utility routine that creates a mapping from the global column 350 number to the local number in the off-diagonal part of the local 351 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 352 a slightly higher hash table cost; without it it is not scalable (each processor 353 has an order N integer array but is fast to acess. 354 */ 355 #undef __FUNCT__ 356 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 357 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 358 { 359 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 360 PetscErrorCode ierr; 361 PetscInt n = aij->B->cmap->n,i; 362 363 PetscFunctionBegin; 364 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 365 #if defined (PETSC_USE_CTABLE) 366 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 367 for (i=0; i<n; i++){ 368 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 369 } 370 #else 371 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 372 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 373 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 374 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 375 #endif 376 PetscFunctionReturn(0); 377 } 378 379 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 380 { \ 381 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 382 lastcol1 = col;\ 383 while (high1-low1 > 5) { \ 384 t = (low1+high1)/2; \ 385 if (rp1[t] > col) high1 = t; \ 386 else low1 = t; \ 387 } \ 388 for (_i=low1; _i<high1; _i++) { \ 389 if (rp1[_i] > col) break; \ 390 if (rp1[_i] == col) { \ 391 if (addv == ADD_VALUES) ap1[_i] += value; \ 392 else ap1[_i] = value; \ 393 goto a_noinsert; \ 394 } \ 395 } \ 396 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 397 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 398 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 399 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 400 N = nrow1++ - 1; a->nz++; high1++; \ 401 /* shift up all the later entries in this row */ \ 402 for (ii=N; ii>=_i; ii--) { \ 403 rp1[ii+1] = rp1[ii]; \ 404 ap1[ii+1] = ap1[ii]; \ 405 } \ 406 rp1[_i] = col; \ 407 ap1[_i] = value; \ 408 a_noinsert: ; \ 409 ailen[row] = nrow1; \ 410 } 411 412 413 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 414 { \ 415 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 416 lastcol2 = col;\ 417 while (high2-low2 > 5) { \ 418 t = (low2+high2)/2; \ 419 if (rp2[t] > col) high2 = t; \ 420 else low2 = t; \ 421 } \ 422 for (_i=low2; _i<high2; _i++) { \ 423 if (rp2[_i] > col) break; \ 424 if (rp2[_i] == col) { \ 425 if (addv == ADD_VALUES) ap2[_i] += value; \ 426 else ap2[_i] = value; \ 427 goto b_noinsert; \ 428 } \ 429 } \ 430 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 431 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 432 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 433 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 434 N = nrow2++ - 1; b->nz++; high2++; \ 435 /* shift up all the later entries in this row */ \ 436 for (ii=N; ii>=_i; ii--) { \ 437 rp2[ii+1] = rp2[ii]; \ 438 ap2[ii+1] = ap2[ii]; \ 439 } \ 440 rp2[_i] = col; \ 441 ap2[_i] = value; \ 442 b_noinsert: ; \ 443 bilen[row] = nrow2; \ 444 } 445 446 #undef __FUNCT__ 447 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 448 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 449 { 450 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 451 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 452 PetscErrorCode ierr; 453 PetscInt l,*garray = mat->garray,diag; 454 455 PetscFunctionBegin; 456 /* code only works for square matrices A */ 457 458 /* find size of row to the left of the diagonal part */ 459 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 460 row = row - diag; 461 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 462 if (garray[b->j[b->i[row]+l]] > diag) break; 463 } 464 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 465 466 /* diagonal part */ 467 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 468 469 /* right of diagonal part */ 470 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 471 PetscFunctionReturn(0); 472 } 473 474 #undef __FUNCT__ 475 #define __FUNCT__ "MatSetValues_MPIAIJ" 476 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 477 { 478 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 479 PetscScalar value; 480 PetscErrorCode ierr; 481 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 482 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 483 PetscBool roworiented = aij->roworiented; 484 485 /* Some Variables required in the macro */ 486 Mat A = aij->A; 487 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 488 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 489 MatScalar *aa = a->a; 490 PetscBool ignorezeroentries = a->ignorezeroentries; 491 Mat B = aij->B; 492 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 493 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 494 MatScalar *ba = b->a; 495 496 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 497 PetscInt nonew; 498 MatScalar *ap1,*ap2; 499 500 PetscFunctionBegin; 501 if (v) PetscValidScalarPointer(v,6); 502 for (i=0; i<m; i++) { 503 if (im[i] < 0) continue; 504 #if defined(PETSC_USE_DEBUG) 505 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 506 #endif 507 if (im[i] >= rstart && im[i] < rend) { 508 row = im[i] - rstart; 509 lastcol1 = -1; 510 rp1 = aj + ai[row]; 511 ap1 = aa + ai[row]; 512 rmax1 = aimax[row]; 513 nrow1 = ailen[row]; 514 low1 = 0; 515 high1 = nrow1; 516 lastcol2 = -1; 517 rp2 = bj + bi[row]; 518 ap2 = ba + bi[row]; 519 rmax2 = bimax[row]; 520 nrow2 = bilen[row]; 521 low2 = 0; 522 high2 = nrow2; 523 524 for (j=0; j<n; j++) { 525 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 526 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 527 if (in[j] >= cstart && in[j] < cend){ 528 col = in[j] - cstart; 529 nonew = a->nonew; 530 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 531 } else if (in[j] < 0) continue; 532 #if defined(PETSC_USE_DEBUG) 533 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 534 #endif 535 else { 536 if (mat->was_assembled) { 537 if (!aij->colmap) { 538 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 539 } 540 #if defined (PETSC_USE_CTABLE) 541 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 542 col--; 543 #else 544 col = aij->colmap[in[j]] - 1; 545 #endif 546 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 547 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 548 col = in[j]; 549 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 550 B = aij->B; 551 b = (Mat_SeqAIJ*)B->data; 552 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 553 rp2 = bj + bi[row]; 554 ap2 = ba + bi[row]; 555 rmax2 = bimax[row]; 556 nrow2 = bilen[row]; 557 low2 = 0; 558 high2 = nrow2; 559 bm = aij->B->rmap->n; 560 ba = b->a; 561 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 562 } else col = in[j]; 563 nonew = b->nonew; 564 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 565 } 566 } 567 } else { 568 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 569 if (!aij->donotstash) { 570 mat->assembled = PETSC_FALSE; 571 if (roworiented) { 572 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 573 } else { 574 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 575 } 576 } 577 } 578 } 579 PetscFunctionReturn(0); 580 } 581 582 #undef __FUNCT__ 583 #define __FUNCT__ "MatGetValues_MPIAIJ" 584 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 585 { 586 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 587 PetscErrorCode ierr; 588 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 589 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 590 591 PetscFunctionBegin; 592 for (i=0; i<m; i++) { 593 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 594 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 595 if (idxm[i] >= rstart && idxm[i] < rend) { 596 row = idxm[i] - rstart; 597 for (j=0; j<n; j++) { 598 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 599 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 600 if (idxn[j] >= cstart && idxn[j] < cend){ 601 col = idxn[j] - cstart; 602 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 603 } else { 604 if (!aij->colmap) { 605 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 606 } 607 #if defined (PETSC_USE_CTABLE) 608 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 609 col --; 610 #else 611 col = aij->colmap[idxn[j]] - 1; 612 #endif 613 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 614 else { 615 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 616 } 617 } 618 } 619 } else { 620 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 621 } 622 } 623 PetscFunctionReturn(0); 624 } 625 626 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 627 628 #undef __FUNCT__ 629 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 630 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 631 { 632 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 633 PetscErrorCode ierr; 634 PetscInt nstash,reallocs; 635 InsertMode addv; 636 637 PetscFunctionBegin; 638 if (aij->donotstash || mat->nooffprocentries) { 639 PetscFunctionReturn(0); 640 } 641 642 /* make sure all processors are either in INSERTMODE or ADDMODE */ 643 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 644 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 645 mat->insertmode = addv; /* in case this processor had no cache */ 646 647 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 648 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 649 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 650 PetscFunctionReturn(0); 651 } 652 653 #undef __FUNCT__ 654 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 655 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 656 { 657 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 658 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 659 PetscErrorCode ierr; 660 PetscMPIInt n; 661 PetscInt i,j,rstart,ncols,flg; 662 PetscInt *row,*col; 663 PetscBool other_disassembled; 664 PetscScalar *val; 665 InsertMode addv = mat->insertmode; 666 667 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 668 PetscFunctionBegin; 669 if (!aij->donotstash && !mat->nooffprocentries) { 670 while (1) { 671 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 672 if (!flg) break; 673 674 for (i=0; i<n;) { 675 /* Now identify the consecutive vals belonging to the same row */ 676 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 677 if (j < n) ncols = j-i; 678 else ncols = n-i; 679 /* Now assemble all these values with a single function call */ 680 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 681 i = j; 682 } 683 } 684 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 685 } 686 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 687 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 688 689 /* determine if any processor has disassembled, if so we must 690 also disassemble ourselfs, in order that we may reassemble. */ 691 /* 692 if nonzero structure of submatrix B cannot change then we know that 693 no processor disassembled thus we can skip this stuff 694 */ 695 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 696 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 697 if (mat->was_assembled && !other_disassembled) { 698 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 699 } 700 } 701 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 702 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 703 } 704 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 705 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 706 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 707 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 708 709 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 710 aij->rowvalues = 0; 711 712 /* used by MatAXPY() */ 713 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 714 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 715 716 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 717 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 718 PetscFunctionReturn(0); 719 } 720 721 #undef __FUNCT__ 722 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 723 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 724 { 725 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 726 PetscErrorCode ierr; 727 728 PetscFunctionBegin; 729 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 730 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 731 PetscFunctionReturn(0); 732 } 733 734 #undef __FUNCT__ 735 #define __FUNCT__ "MatZeroRows_MPIAIJ" 736 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 737 { 738 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 739 PetscErrorCode ierr; 740 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 741 PetscInt i,*owners = A->rmap->range; 742 PetscInt *nprocs,j,idx,nsends,row; 743 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 744 PetscInt *rvalues,count,base,slen,*source; 745 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 746 MPI_Comm comm = ((PetscObject)A)->comm; 747 MPI_Request *send_waits,*recv_waits; 748 MPI_Status recv_status,*send_status; 749 const PetscScalar *xx; 750 PetscScalar *bb; 751 #if defined(PETSC_DEBUG) 752 PetscBool found = PETSC_FALSE; 753 #endif 754 755 PetscFunctionBegin; 756 /* first count number of contributors to each processor */ 757 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 758 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 759 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 760 j = 0; 761 for (i=0; i<N; i++) { 762 if (lastidx > (idx = rows[i])) j = 0; 763 lastidx = idx; 764 for (; j<size; j++) { 765 if (idx >= owners[j] && idx < owners[j+1]) { 766 nprocs[2*j]++; 767 nprocs[2*j+1] = 1; 768 owner[i] = j; 769 #if defined(PETSC_DEBUG) 770 found = PETSC_TRUE; 771 #endif 772 break; 773 } 774 } 775 #if defined(PETSC_DEBUG) 776 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 777 found = PETSC_FALSE; 778 #endif 779 } 780 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 781 782 if (A->nooffproczerorows) { 783 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 784 nrecvs = nsends; 785 nmax = N; 786 } else { 787 /* inform other processors of number of messages and max length*/ 788 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 789 } 790 791 /* post receives: */ 792 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 793 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 794 for (i=0; i<nrecvs; i++) { 795 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 796 } 797 798 /* do sends: 799 1) starts[i] gives the starting index in svalues for stuff going to 800 the ith processor 801 */ 802 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 803 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 804 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 805 starts[0] = 0; 806 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 807 for (i=0; i<N; i++) { 808 svalues[starts[owner[i]]++] = rows[i]; 809 } 810 811 starts[0] = 0; 812 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 813 count = 0; 814 for (i=0; i<size; i++) { 815 if (nprocs[2*i+1]) { 816 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 817 } 818 } 819 ierr = PetscFree(starts);CHKERRQ(ierr); 820 821 base = owners[rank]; 822 823 /* wait on receives */ 824 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 825 count = nrecvs; slen = 0; 826 while (count) { 827 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 828 /* unpack receives into our local space */ 829 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 830 source[imdex] = recv_status.MPI_SOURCE; 831 lens[imdex] = n; 832 slen += n; 833 count--; 834 } 835 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 836 837 /* move the data into the send scatter */ 838 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 839 count = 0; 840 for (i=0; i<nrecvs; i++) { 841 values = rvalues + i*nmax; 842 for (j=0; j<lens[i]; j++) { 843 lrows[count++] = values[j] - base; 844 } 845 } 846 ierr = PetscFree(rvalues);CHKERRQ(ierr); 847 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 848 ierr = PetscFree(owner);CHKERRQ(ierr); 849 ierr = PetscFree(nprocs);CHKERRQ(ierr); 850 851 /* fix right hand side if needed */ 852 if (x && b) { 853 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 854 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 855 for (i=0; i<slen; i++) { 856 bb[lrows[i]] = diag*xx[lrows[i]]; 857 } 858 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 859 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 860 } 861 /* 862 Zero the required rows. If the "diagonal block" of the matrix 863 is square and the user wishes to set the diagonal we use separate 864 code so that MatSetValues() is not called for each diagonal allocating 865 new memory, thus calling lots of mallocs and slowing things down. 866 867 */ 868 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 869 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 870 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 871 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 872 } else if (diag != 0.0) { 873 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 874 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 875 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 876 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 877 } 878 for (i = 0; i < slen; i++) { 879 row = lrows[i] + rstart; 880 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 881 } 882 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 883 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 884 } else { 885 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 886 } 887 ierr = PetscFree(lrows);CHKERRQ(ierr); 888 889 /* wait on sends */ 890 if (nsends) { 891 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 892 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 893 ierr = PetscFree(send_status);CHKERRQ(ierr); 894 } 895 ierr = PetscFree(send_waits);CHKERRQ(ierr); 896 ierr = PetscFree(svalues);CHKERRQ(ierr); 897 PetscFunctionReturn(0); 898 } 899 900 #undef __FUNCT__ 901 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 902 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 903 { 904 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 905 PetscErrorCode ierr; 906 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 907 PetscInt i,*owners = A->rmap->range; 908 PetscInt *nprocs,j,idx,nsends; 909 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 910 PetscInt *rvalues,count,base,slen,*source; 911 PetscInt *lens,*lrows,*values,m; 912 MPI_Comm comm = ((PetscObject)A)->comm; 913 MPI_Request *send_waits,*recv_waits; 914 MPI_Status recv_status,*send_status; 915 const PetscScalar *xx; 916 PetscScalar *bb,*mask; 917 Vec xmask,lmask; 918 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 919 const PetscInt *aj, *ii,*ridx; 920 PetscScalar *aa; 921 #if defined(PETSC_DEBUG) 922 PetscBool found = PETSC_FALSE; 923 #endif 924 925 PetscFunctionBegin; 926 /* first count number of contributors to each processor */ 927 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 928 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 929 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 930 j = 0; 931 for (i=0; i<N; i++) { 932 if (lastidx > (idx = rows[i])) j = 0; 933 lastidx = idx; 934 for (; j<size; j++) { 935 if (idx >= owners[j] && idx < owners[j+1]) { 936 nprocs[2*j]++; 937 nprocs[2*j+1] = 1; 938 owner[i] = j; 939 #if defined(PETSC_DEBUG) 940 found = PETSC_TRUE; 941 #endif 942 break; 943 } 944 } 945 #if defined(PETSC_DEBUG) 946 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 947 found = PETSC_FALSE; 948 #endif 949 } 950 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 951 952 /* inform other processors of number of messages and max length*/ 953 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 954 955 /* post receives: */ 956 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 957 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 958 for (i=0; i<nrecvs; i++) { 959 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 960 } 961 962 /* do sends: 963 1) starts[i] gives the starting index in svalues for stuff going to 964 the ith processor 965 */ 966 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 967 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 968 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 969 starts[0] = 0; 970 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 971 for (i=0; i<N; i++) { 972 svalues[starts[owner[i]]++] = rows[i]; 973 } 974 975 starts[0] = 0; 976 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 977 count = 0; 978 for (i=0; i<size; i++) { 979 if (nprocs[2*i+1]) { 980 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 981 } 982 } 983 ierr = PetscFree(starts);CHKERRQ(ierr); 984 985 base = owners[rank]; 986 987 /* wait on receives */ 988 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 989 count = nrecvs; slen = 0; 990 while (count) { 991 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 992 /* unpack receives into our local space */ 993 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 994 source[imdex] = recv_status.MPI_SOURCE; 995 lens[imdex] = n; 996 slen += n; 997 count--; 998 } 999 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1000 1001 /* move the data into the send scatter */ 1002 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1003 count = 0; 1004 for (i=0; i<nrecvs; i++) { 1005 values = rvalues + i*nmax; 1006 for (j=0; j<lens[i]; j++) { 1007 lrows[count++] = values[j] - base; 1008 } 1009 } 1010 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1011 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1012 ierr = PetscFree(owner);CHKERRQ(ierr); 1013 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1014 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 1015 1016 /* zero diagonal part of matrix */ 1017 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 1018 1019 /* handle off diagonal part of matrix */ 1020 ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr); 1021 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1022 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1023 for (i=0; i<slen; i++) { 1024 bb[lrows[i]] = 1; 1025 } 1026 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1027 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1028 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1029 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1030 if (x) { 1031 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1032 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1033 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1034 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1035 } 1036 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1037 1038 /* remove zeroed rows of off diagonal matrix */ 1039 ii = aij->i; 1040 for (i=0; i<slen; i++) { 1041 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1042 } 1043 1044 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1045 if (aij->compressedrow.use){ 1046 m = aij->compressedrow.nrows; 1047 ii = aij->compressedrow.i; 1048 ridx = aij->compressedrow.rindex; 1049 for (i=0; i<m; i++){ 1050 n = ii[i+1] - ii[i]; 1051 aj = aij->j + ii[i]; 1052 aa = aij->a + ii[i]; 1053 1054 for (j=0; j<n; j++) { 1055 if (PetscAbsScalar(mask[*aj])) { 1056 if (b) bb[*ridx] -= *aa*xx[*aj]; 1057 *aa = 0.0; 1058 } 1059 aa++; 1060 aj++; 1061 } 1062 ridx++; 1063 } 1064 } else { /* do not use compressed row format */ 1065 m = l->B->rmap->n; 1066 for (i=0; i<m; i++) { 1067 n = ii[i+1] - ii[i]; 1068 aj = aij->j + ii[i]; 1069 aa = aij->a + ii[i]; 1070 for (j=0; j<n; j++) { 1071 if (PetscAbsScalar(mask[*aj])) { 1072 if (b) bb[i] -= *aa*xx[*aj]; 1073 *aa = 0.0; 1074 } 1075 aa++; 1076 aj++; 1077 } 1078 } 1079 } 1080 if (x) { 1081 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1082 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1083 } 1084 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1085 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1086 ierr = PetscFree(lrows);CHKERRQ(ierr); 1087 1088 /* wait on sends */ 1089 if (nsends) { 1090 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1091 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1092 ierr = PetscFree(send_status);CHKERRQ(ierr); 1093 } 1094 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1095 ierr = PetscFree(svalues);CHKERRQ(ierr); 1096 1097 PetscFunctionReturn(0); 1098 } 1099 1100 #undef __FUNCT__ 1101 #define __FUNCT__ "MatMult_MPIAIJ" 1102 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1103 { 1104 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1105 PetscErrorCode ierr; 1106 PetscInt nt; 1107 1108 PetscFunctionBegin; 1109 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1110 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1111 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1112 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1113 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1114 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1115 PetscFunctionReturn(0); 1116 } 1117 1118 #undef __FUNCT__ 1119 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1120 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1121 { 1122 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1123 PetscErrorCode ierr; 1124 1125 PetscFunctionBegin; 1126 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1127 PetscFunctionReturn(0); 1128 } 1129 1130 #undef __FUNCT__ 1131 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1132 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1133 { 1134 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1135 PetscErrorCode ierr; 1136 1137 PetscFunctionBegin; 1138 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1139 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1140 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1141 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1142 PetscFunctionReturn(0); 1143 } 1144 1145 #undef __FUNCT__ 1146 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1147 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1148 { 1149 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1150 PetscErrorCode ierr; 1151 PetscBool merged; 1152 1153 PetscFunctionBegin; 1154 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1155 /* do nondiagonal part */ 1156 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1157 if (!merged) { 1158 /* send it on its way */ 1159 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1160 /* do local part */ 1161 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1162 /* receive remote parts: note this assumes the values are not actually */ 1163 /* added in yy until the next line, */ 1164 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1165 } else { 1166 /* do local part */ 1167 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1168 /* send it on its way */ 1169 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1170 /* values actually were received in the Begin() but we need to call this nop */ 1171 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1172 } 1173 PetscFunctionReturn(0); 1174 } 1175 1176 EXTERN_C_BEGIN 1177 #undef __FUNCT__ 1178 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1179 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1180 { 1181 MPI_Comm comm; 1182 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 1183 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1184 IS Me,Notme; 1185 PetscErrorCode ierr; 1186 PetscInt M,N,first,last,*notme,i; 1187 PetscMPIInt size; 1188 1189 PetscFunctionBegin; 1190 1191 /* Easy test: symmetric diagonal block */ 1192 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 1193 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1194 if (!*f) PetscFunctionReturn(0); 1195 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1196 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1197 if (size == 1) PetscFunctionReturn(0); 1198 1199 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1200 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1201 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1202 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1203 for (i=0; i<first; i++) notme[i] = i; 1204 for (i=last; i<M; i++) notme[i-last+first] = i; 1205 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1206 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1207 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1208 Aoff = Aoffs[0]; 1209 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1210 Boff = Boffs[0]; 1211 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1212 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1213 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1214 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1215 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1216 ierr = PetscFree(notme);CHKERRQ(ierr); 1217 PetscFunctionReturn(0); 1218 } 1219 EXTERN_C_END 1220 1221 #undef __FUNCT__ 1222 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1223 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1224 { 1225 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1226 PetscErrorCode ierr; 1227 1228 PetscFunctionBegin; 1229 /* do nondiagonal part */ 1230 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1231 /* send it on its way */ 1232 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1233 /* do local part */ 1234 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1235 /* receive remote parts */ 1236 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1237 PetscFunctionReturn(0); 1238 } 1239 1240 /* 1241 This only works correctly for square matrices where the subblock A->A is the 1242 diagonal block 1243 */ 1244 #undef __FUNCT__ 1245 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1246 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1247 { 1248 PetscErrorCode ierr; 1249 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1250 1251 PetscFunctionBegin; 1252 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1253 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1254 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1255 PetscFunctionReturn(0); 1256 } 1257 1258 #undef __FUNCT__ 1259 #define __FUNCT__ "MatScale_MPIAIJ" 1260 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1261 { 1262 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1263 PetscErrorCode ierr; 1264 1265 PetscFunctionBegin; 1266 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1267 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1268 PetscFunctionReturn(0); 1269 } 1270 1271 #undef __FUNCT__ 1272 #define __FUNCT__ "MatDestroy_MPIAIJ" 1273 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1274 { 1275 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1276 PetscErrorCode ierr; 1277 1278 PetscFunctionBegin; 1279 #if defined(PETSC_USE_LOG) 1280 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1281 #endif 1282 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1283 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1284 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1285 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1286 #if defined (PETSC_USE_CTABLE) 1287 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1288 #else 1289 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1290 #endif 1291 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1292 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1293 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1294 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1295 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1296 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1297 1298 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1299 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1303 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1304 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1305 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1306 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1307 PetscFunctionReturn(0); 1308 } 1309 1310 #undef __FUNCT__ 1311 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1312 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1313 { 1314 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1315 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 1316 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 1317 PetscErrorCode ierr; 1318 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1319 int fd; 1320 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1321 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1322 PetscScalar *column_values; 1323 PetscInt message_count,flowcontrolcount; 1324 FILE *file; 1325 1326 PetscFunctionBegin; 1327 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1328 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1329 nz = A->nz + B->nz; 1330 if (!rank) { 1331 header[0] = MAT_FILE_CLASSID; 1332 header[1] = mat->rmap->N; 1333 header[2] = mat->cmap->N; 1334 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1335 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1336 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1337 /* get largest number of rows any processor has */ 1338 rlen = mat->rmap->n; 1339 range = mat->rmap->range; 1340 for (i=1; i<size; i++) { 1341 rlen = PetscMax(rlen,range[i+1] - range[i]); 1342 } 1343 } else { 1344 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1345 rlen = mat->rmap->n; 1346 } 1347 1348 /* load up the local row counts */ 1349 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1350 for (i=0; i<mat->rmap->n; i++) { 1351 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1352 } 1353 1354 /* store the row lengths to the file */ 1355 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1356 if (!rank) { 1357 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1358 for (i=1; i<size; i++) { 1359 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1360 rlen = range[i+1] - range[i]; 1361 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1362 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1363 } 1364 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1365 } else { 1366 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1367 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1368 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1369 } 1370 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1371 1372 /* load up the local column indices */ 1373 nzmax = nz; /* )th processor needs space a largest processor needs */ 1374 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1375 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1376 cnt = 0; 1377 for (i=0; i<mat->rmap->n; i++) { 1378 for (j=B->i[i]; j<B->i[i+1]; j++) { 1379 if ( (col = garray[B->j[j]]) > cstart) break; 1380 column_indices[cnt++] = col; 1381 } 1382 for (k=A->i[i]; k<A->i[i+1]; k++) { 1383 column_indices[cnt++] = A->j[k] + cstart; 1384 } 1385 for (; j<B->i[i+1]; j++) { 1386 column_indices[cnt++] = garray[B->j[j]]; 1387 } 1388 } 1389 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1390 1391 /* store the column indices to the file */ 1392 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1393 if (!rank) { 1394 MPI_Status status; 1395 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1396 for (i=1; i<size; i++) { 1397 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1398 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1399 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1400 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1401 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1402 } 1403 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1404 } else { 1405 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1406 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1407 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1408 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1409 } 1410 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1411 1412 /* load up the local column values */ 1413 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1414 cnt = 0; 1415 for (i=0; i<mat->rmap->n; i++) { 1416 for (j=B->i[i]; j<B->i[i+1]; j++) { 1417 if ( garray[B->j[j]] > cstart) break; 1418 column_values[cnt++] = B->a[j]; 1419 } 1420 for (k=A->i[i]; k<A->i[i+1]; k++) { 1421 column_values[cnt++] = A->a[k]; 1422 } 1423 for (; j<B->i[i+1]; j++) { 1424 column_values[cnt++] = B->a[j]; 1425 } 1426 } 1427 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1428 1429 /* store the column values to the file */ 1430 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1431 if (!rank) { 1432 MPI_Status status; 1433 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1434 for (i=1; i<size; i++) { 1435 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1436 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1437 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1438 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1439 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1440 } 1441 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1442 } else { 1443 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1444 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1445 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1446 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1447 } 1448 ierr = PetscFree(column_values);CHKERRQ(ierr); 1449 1450 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1451 if (file) { 1452 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1453 } 1454 1455 PetscFunctionReturn(0); 1456 } 1457 1458 #undef __FUNCT__ 1459 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1460 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1461 { 1462 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1463 PetscErrorCode ierr; 1464 PetscMPIInt rank = aij->rank,size = aij->size; 1465 PetscBool isdraw,iascii,isbinary; 1466 PetscViewer sviewer; 1467 PetscViewerFormat format; 1468 1469 PetscFunctionBegin; 1470 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1471 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1472 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1473 if (iascii) { 1474 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1475 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1476 MatInfo info; 1477 PetscBool inodes; 1478 1479 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1480 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1481 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1482 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1483 if (!inodes) { 1484 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1485 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1486 } else { 1487 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1488 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1489 } 1490 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1491 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1492 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1493 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1494 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1495 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1496 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1497 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1498 PetscFunctionReturn(0); 1499 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1500 PetscInt inodecount,inodelimit,*inodes; 1501 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1502 if (inodes) { 1503 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1504 } else { 1505 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1506 } 1507 PetscFunctionReturn(0); 1508 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1509 PetscFunctionReturn(0); 1510 } 1511 } else if (isbinary) { 1512 if (size == 1) { 1513 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1514 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1515 } else { 1516 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1517 } 1518 PetscFunctionReturn(0); 1519 } else if (isdraw) { 1520 PetscDraw draw; 1521 PetscBool isnull; 1522 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1523 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1524 } 1525 1526 if (size == 1) { 1527 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1528 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1529 } else { 1530 /* assemble the entire matrix onto first processor. */ 1531 Mat A; 1532 Mat_SeqAIJ *Aloc; 1533 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1534 MatScalar *a; 1535 1536 if (mat->rmap->N > 1024) { 1537 PetscBool flg = PETSC_FALSE; 1538 1539 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1540 if (!flg) { 1541 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1542 } 1543 } 1544 1545 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1546 if (!rank) { 1547 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1548 } else { 1549 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1550 } 1551 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1552 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1553 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1554 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1555 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1556 1557 /* copy over the A part */ 1558 Aloc = (Mat_SeqAIJ*)aij->A->data; 1559 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1560 row = mat->rmap->rstart; 1561 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1562 for (i=0; i<m; i++) { 1563 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1564 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1565 } 1566 aj = Aloc->j; 1567 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1568 1569 /* copy over the B part */ 1570 Aloc = (Mat_SeqAIJ*)aij->B->data; 1571 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1572 row = mat->rmap->rstart; 1573 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1574 ct = cols; 1575 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1576 for (i=0; i<m; i++) { 1577 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1578 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1579 } 1580 ierr = PetscFree(ct);CHKERRQ(ierr); 1581 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1582 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1583 /* 1584 Everyone has to call to draw the matrix since the graphics waits are 1585 synchronized across all processors that share the PetscDraw object 1586 */ 1587 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1588 if (!rank) { 1589 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1590 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1591 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1592 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1593 } 1594 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1595 ierr = MatDestroy(&A);CHKERRQ(ierr); 1596 } 1597 PetscFunctionReturn(0); 1598 } 1599 1600 #undef __FUNCT__ 1601 #define __FUNCT__ "MatView_MPIAIJ" 1602 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1603 { 1604 PetscErrorCode ierr; 1605 PetscBool iascii,isdraw,issocket,isbinary; 1606 1607 PetscFunctionBegin; 1608 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1609 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1610 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1611 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1612 if (iascii || isdraw || isbinary || issocket) { 1613 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1614 } else { 1615 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1616 } 1617 PetscFunctionReturn(0); 1618 } 1619 1620 #undef __FUNCT__ 1621 #define __FUNCT__ "MatSOR_MPIAIJ" 1622 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1623 { 1624 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1625 PetscErrorCode ierr; 1626 Vec bb1 = 0; 1627 PetscBool hasop; 1628 1629 PetscFunctionBegin; 1630 if (flag == SOR_APPLY_UPPER) { 1631 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1632 PetscFunctionReturn(0); 1633 } 1634 1635 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1636 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1637 } 1638 1639 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1640 if (flag & SOR_ZERO_INITIAL_GUESS) { 1641 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1642 its--; 1643 } 1644 1645 while (its--) { 1646 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1647 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1648 1649 /* update rhs: bb1 = bb - B*x */ 1650 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1651 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1652 1653 /* local sweep */ 1654 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1655 } 1656 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1657 if (flag & SOR_ZERO_INITIAL_GUESS) { 1658 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1659 its--; 1660 } 1661 while (its--) { 1662 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1663 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1664 1665 /* update rhs: bb1 = bb - B*x */ 1666 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1667 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1668 1669 /* local sweep */ 1670 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1671 } 1672 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1673 if (flag & SOR_ZERO_INITIAL_GUESS) { 1674 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1675 its--; 1676 } 1677 while (its--) { 1678 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1679 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1680 1681 /* update rhs: bb1 = bb - B*x */ 1682 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1683 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1684 1685 /* local sweep */ 1686 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1687 } 1688 } else if (flag & SOR_EISENSTAT) { 1689 Vec xx1; 1690 1691 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1692 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1693 1694 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1695 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1696 if (!mat->diag) { 1697 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1698 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1699 } 1700 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1701 if (hasop) { 1702 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1703 } else { 1704 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1705 } 1706 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1707 1708 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1709 1710 /* local sweep */ 1711 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1712 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1713 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1714 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1715 1716 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1717 PetscFunctionReturn(0); 1718 } 1719 1720 #undef __FUNCT__ 1721 #define __FUNCT__ "MatPermute_MPIAIJ" 1722 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1723 { 1724 MPI_Comm comm; 1725 PetscInt first,local_rowsize,local_colsize; 1726 const PetscInt *rows; 1727 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1728 PetscErrorCode ierr; 1729 1730 PetscFunctionBegin; 1731 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1732 /* make a collective version of 'rowp', this is to be tolerant of users who pass serial index sets */ 1733 ierr = ISOnComm(rowp,comm,PETSC_USE_POINTER,&crowp);CHKERRQ(ierr); 1734 /* collect the global row permutation and invert it */ 1735 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1736 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1737 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 1738 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1739 ierr = ISDestroy(&growp);CHKERRQ(ierr); 1740 /* get the local target indices */ 1741 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1742 ierr = MatGetLocalSize(A,&local_rowsize,&local_colsize);CHKERRQ(ierr); 1743 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1744 ierr = ISCreateGeneral(PETSC_COMM_SELF,local_rowsize,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 1745 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1746 ierr = ISDestroy(&irowp);CHKERRQ(ierr); 1747 /* the column permutation is so much easier; 1748 make a local version of 'colp' and invert it */ 1749 ierr = ISOnComm(colp,PETSC_COMM_SELF,PETSC_USE_POINTER,&lcolp);CHKERRQ(ierr); 1750 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1751 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1752 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 1753 /* now we just get the submatrix */ 1754 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_colsize,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1755 /* clean up */ 1756 ierr = ISDestroy(&lrowp);CHKERRQ(ierr); 1757 ierr = ISDestroy(&icolp);CHKERRQ(ierr); 1758 PetscFunctionReturn(0); 1759 } 1760 1761 #undef __FUNCT__ 1762 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1763 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1764 { 1765 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1766 Mat A = mat->A,B = mat->B; 1767 PetscErrorCode ierr; 1768 PetscReal isend[5],irecv[5]; 1769 1770 PetscFunctionBegin; 1771 info->block_size = 1.0; 1772 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1773 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1774 isend[3] = info->memory; isend[4] = info->mallocs; 1775 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1776 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1777 isend[3] += info->memory; isend[4] += info->mallocs; 1778 if (flag == MAT_LOCAL) { 1779 info->nz_used = isend[0]; 1780 info->nz_allocated = isend[1]; 1781 info->nz_unneeded = isend[2]; 1782 info->memory = isend[3]; 1783 info->mallocs = isend[4]; 1784 } else if (flag == MAT_GLOBAL_MAX) { 1785 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1786 info->nz_used = irecv[0]; 1787 info->nz_allocated = irecv[1]; 1788 info->nz_unneeded = irecv[2]; 1789 info->memory = irecv[3]; 1790 info->mallocs = irecv[4]; 1791 } else if (flag == MAT_GLOBAL_SUM) { 1792 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1793 info->nz_used = irecv[0]; 1794 info->nz_allocated = irecv[1]; 1795 info->nz_unneeded = irecv[2]; 1796 info->memory = irecv[3]; 1797 info->mallocs = irecv[4]; 1798 } 1799 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1800 info->fill_ratio_needed = 0; 1801 info->factor_mallocs = 0; 1802 PetscFunctionReturn(0); 1803 } 1804 1805 #undef __FUNCT__ 1806 #define __FUNCT__ "MatSetOption_MPIAIJ" 1807 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1808 { 1809 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1810 PetscErrorCode ierr; 1811 1812 PetscFunctionBegin; 1813 switch (op) { 1814 case MAT_NEW_NONZERO_LOCATIONS: 1815 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1816 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1817 case MAT_KEEP_NONZERO_PATTERN: 1818 case MAT_NEW_NONZERO_LOCATION_ERR: 1819 case MAT_USE_INODES: 1820 case MAT_IGNORE_ZERO_ENTRIES: 1821 MatCheckPreallocated(A,1); 1822 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1823 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1824 break; 1825 case MAT_ROW_ORIENTED: 1826 a->roworiented = flg; 1827 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1828 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1829 break; 1830 case MAT_NEW_DIAGONALS: 1831 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1832 break; 1833 case MAT_IGNORE_OFF_PROC_ENTRIES: 1834 a->donotstash = flg; 1835 break; 1836 case MAT_SPD: 1837 A->spd_set = PETSC_TRUE; 1838 A->spd = flg; 1839 if (flg) { 1840 A->symmetric = PETSC_TRUE; 1841 A->structurally_symmetric = PETSC_TRUE; 1842 A->symmetric_set = PETSC_TRUE; 1843 A->structurally_symmetric_set = PETSC_TRUE; 1844 } 1845 break; 1846 case MAT_SYMMETRIC: 1847 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1848 break; 1849 case MAT_STRUCTURALLY_SYMMETRIC: 1850 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1851 break; 1852 case MAT_HERMITIAN: 1853 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1854 break; 1855 case MAT_SYMMETRY_ETERNAL: 1856 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1857 break; 1858 default: 1859 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1860 } 1861 PetscFunctionReturn(0); 1862 } 1863 1864 #undef __FUNCT__ 1865 #define __FUNCT__ "MatGetRow_MPIAIJ" 1866 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1867 { 1868 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1869 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1870 PetscErrorCode ierr; 1871 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1872 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1873 PetscInt *cmap,*idx_p; 1874 1875 PetscFunctionBegin; 1876 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1877 mat->getrowactive = PETSC_TRUE; 1878 1879 if (!mat->rowvalues && (idx || v)) { 1880 /* 1881 allocate enough space to hold information from the longest row. 1882 */ 1883 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1884 PetscInt max = 1,tmp; 1885 for (i=0; i<matin->rmap->n; i++) { 1886 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1887 if (max < tmp) { max = tmp; } 1888 } 1889 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1890 } 1891 1892 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1893 lrow = row - rstart; 1894 1895 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1896 if (!v) {pvA = 0; pvB = 0;} 1897 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1898 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1899 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1900 nztot = nzA + nzB; 1901 1902 cmap = mat->garray; 1903 if (v || idx) { 1904 if (nztot) { 1905 /* Sort by increasing column numbers, assuming A and B already sorted */ 1906 PetscInt imark = -1; 1907 if (v) { 1908 *v = v_p = mat->rowvalues; 1909 for (i=0; i<nzB; i++) { 1910 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1911 else break; 1912 } 1913 imark = i; 1914 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1915 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1916 } 1917 if (idx) { 1918 *idx = idx_p = mat->rowindices; 1919 if (imark > -1) { 1920 for (i=0; i<imark; i++) { 1921 idx_p[i] = cmap[cworkB[i]]; 1922 } 1923 } else { 1924 for (i=0; i<nzB; i++) { 1925 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1926 else break; 1927 } 1928 imark = i; 1929 } 1930 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1931 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1932 } 1933 } else { 1934 if (idx) *idx = 0; 1935 if (v) *v = 0; 1936 } 1937 } 1938 *nz = nztot; 1939 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1940 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1941 PetscFunctionReturn(0); 1942 } 1943 1944 #undef __FUNCT__ 1945 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1946 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1947 { 1948 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1949 1950 PetscFunctionBegin; 1951 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1952 aij->getrowactive = PETSC_FALSE; 1953 PetscFunctionReturn(0); 1954 } 1955 1956 #undef __FUNCT__ 1957 #define __FUNCT__ "MatNorm_MPIAIJ" 1958 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1959 { 1960 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1961 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1962 PetscErrorCode ierr; 1963 PetscInt i,j,cstart = mat->cmap->rstart; 1964 PetscReal sum = 0.0; 1965 MatScalar *v; 1966 1967 PetscFunctionBegin; 1968 if (aij->size == 1) { 1969 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1970 } else { 1971 if (type == NORM_FROBENIUS) { 1972 v = amat->a; 1973 for (i=0; i<amat->nz; i++) { 1974 #if defined(PETSC_USE_COMPLEX) 1975 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1976 #else 1977 sum += (*v)*(*v); v++; 1978 #endif 1979 } 1980 v = bmat->a; 1981 for (i=0; i<bmat->nz; i++) { 1982 #if defined(PETSC_USE_COMPLEX) 1983 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1984 #else 1985 sum += (*v)*(*v); v++; 1986 #endif 1987 } 1988 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1989 *norm = PetscSqrtReal(*norm); 1990 } else if (type == NORM_1) { /* max column norm */ 1991 PetscReal *tmp,*tmp2; 1992 PetscInt *jj,*garray = aij->garray; 1993 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1994 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1995 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1996 *norm = 0.0; 1997 v = amat->a; jj = amat->j; 1998 for (j=0; j<amat->nz; j++) { 1999 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 2000 } 2001 v = bmat->a; jj = bmat->j; 2002 for (j=0; j<bmat->nz; j++) { 2003 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 2004 } 2005 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 2006 for (j=0; j<mat->cmap->N; j++) { 2007 if (tmp2[j] > *norm) *norm = tmp2[j]; 2008 } 2009 ierr = PetscFree(tmp);CHKERRQ(ierr); 2010 ierr = PetscFree(tmp2);CHKERRQ(ierr); 2011 } else if (type == NORM_INFINITY) { /* max row norm */ 2012 PetscReal ntemp = 0.0; 2013 for (j=0; j<aij->A->rmap->n; j++) { 2014 v = amat->a + amat->i[j]; 2015 sum = 0.0; 2016 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 2017 sum += PetscAbsScalar(*v); v++; 2018 } 2019 v = bmat->a + bmat->i[j]; 2020 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2021 sum += PetscAbsScalar(*v); v++; 2022 } 2023 if (sum > ntemp) ntemp = sum; 2024 } 2025 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 2026 } else { 2027 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 2028 } 2029 } 2030 PetscFunctionReturn(0); 2031 } 2032 2033 #undef __FUNCT__ 2034 #define __FUNCT__ "MatTranspose_MPIAIJ" 2035 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2036 { 2037 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2038 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2039 PetscErrorCode ierr; 2040 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 2041 PetscInt cstart=A->cmap->rstart,ncol; 2042 Mat B; 2043 MatScalar *array; 2044 2045 PetscFunctionBegin; 2046 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2047 2048 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2049 ai = Aloc->i; aj = Aloc->j; 2050 bi = Bloc->i; bj = Bloc->j; 2051 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2052 PetscInt *d_nnz,*g_nnz,*o_nnz; 2053 PetscSFNode *oloc; 2054 PetscSF sf; 2055 2056 ierr = PetscMalloc4(na,PetscInt,&d_nnz,na,PetscInt,&o_nnz,nb,PetscInt,&g_nnz,nb,PetscSFNode,&oloc);CHKERRQ(ierr); 2057 /* compute d_nnz for preallocation */ 2058 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2059 for (i=0; i<ai[ma]; i++) { 2060 d_nnz[aj[i]] ++; 2061 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2062 } 2063 /* compute local off-diagonal contributions */ 2064 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2065 /* map those to global */ 2066 for (i=0; i<nb; i++) { 2067 PetscInt gcol = a->garray[i],colowner; 2068 ierr = PetscLayoutFindOwner(A->cmap,a->garray[i],&colowner);CHKERRQ(ierr); 2069 oloc[i].rank = colowner; 2070 oloc[i].index = gcol - A->cmap->range[colowner]; 2071 } 2072 ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr); 2073 ierr = PetscSFSetGraph(sf,na,nb,PETSC_NULL,PETSC_USE_POINTER,oloc,PETSC_USE_POINTER);CHKERRQ(ierr); 2074 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2075 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2076 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2077 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2078 2079 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 2080 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2081 ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs); CHKERRQ(ierr); 2082 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2083 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2084 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2085 } else { 2086 B = *matout; 2087 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2088 for (i=0; i<ai[ma]; i++){ 2089 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2090 } 2091 } 2092 2093 /* copy over the A part */ 2094 array = Aloc->a; 2095 row = A->rmap->rstart; 2096 for (i=0; i<ma; i++) { 2097 ncol = ai[i+1]-ai[i]; 2098 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2099 row++; array += ncol; aj += ncol; 2100 } 2101 aj = Aloc->j; 2102 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2103 2104 /* copy over the B part */ 2105 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2106 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2107 array = Bloc->a; 2108 row = A->rmap->rstart; 2109 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 2110 cols_tmp = cols; 2111 for (i=0; i<mb; i++) { 2112 ncol = bi[i+1]-bi[i]; 2113 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2114 row++; array += ncol; cols_tmp += ncol; 2115 } 2116 ierr = PetscFree(cols);CHKERRQ(ierr); 2117 2118 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2119 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2120 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2121 *matout = B; 2122 } else { 2123 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2124 } 2125 PetscFunctionReturn(0); 2126 } 2127 2128 #undef __FUNCT__ 2129 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2130 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2131 { 2132 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2133 Mat a = aij->A,b = aij->B; 2134 PetscErrorCode ierr; 2135 PetscInt s1,s2,s3; 2136 2137 PetscFunctionBegin; 2138 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2139 if (rr) { 2140 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2141 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2142 /* Overlap communication with computation. */ 2143 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2144 } 2145 if (ll) { 2146 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2147 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2148 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2149 } 2150 /* scale the diagonal block */ 2151 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2152 2153 if (rr) { 2154 /* Do a scatter end and then right scale the off-diagonal block */ 2155 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2156 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2157 } 2158 2159 PetscFunctionReturn(0); 2160 } 2161 2162 #undef __FUNCT__ 2163 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2164 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2165 { 2166 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2167 PetscErrorCode ierr; 2168 2169 PetscFunctionBegin; 2170 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2171 PetscFunctionReturn(0); 2172 } 2173 2174 #undef __FUNCT__ 2175 #define __FUNCT__ "MatEqual_MPIAIJ" 2176 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2177 { 2178 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2179 Mat a,b,c,d; 2180 PetscBool flg; 2181 PetscErrorCode ierr; 2182 2183 PetscFunctionBegin; 2184 a = matA->A; b = matA->B; 2185 c = matB->A; d = matB->B; 2186 2187 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2188 if (flg) { 2189 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2190 } 2191 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2192 PetscFunctionReturn(0); 2193 } 2194 2195 #undef __FUNCT__ 2196 #define __FUNCT__ "MatCopy_MPIAIJ" 2197 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2198 { 2199 PetscErrorCode ierr; 2200 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2201 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2202 2203 PetscFunctionBegin; 2204 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2205 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2206 /* because of the column compression in the off-processor part of the matrix a->B, 2207 the number of columns in a->B and b->B may be different, hence we cannot call 2208 the MatCopy() directly on the two parts. If need be, we can provide a more 2209 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2210 then copying the submatrices */ 2211 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2212 } else { 2213 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2214 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2215 } 2216 PetscFunctionReturn(0); 2217 } 2218 2219 #undef __FUNCT__ 2220 #define __FUNCT__ "MatSetUp_MPIAIJ" 2221 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2222 { 2223 PetscErrorCode ierr; 2224 2225 PetscFunctionBegin; 2226 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2227 PetscFunctionReturn(0); 2228 } 2229 2230 #undef __FUNCT__ 2231 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2232 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2233 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt* nnz) 2234 { 2235 PetscInt i,m=Y->rmap->N; 2236 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2237 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2238 const PetscInt *xi = x->i,*yi = y->i; 2239 2240 PetscFunctionBegin; 2241 /* Set the number of nonzeros in the new matrix */ 2242 for (i=0; i<m; i++) { 2243 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2244 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2245 nnz[i] = 0; 2246 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2247 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2248 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2249 nnz[i]++; 2250 } 2251 for (; k<nzy; k++) nnz[i]++; 2252 } 2253 PetscFunctionReturn(0); 2254 } 2255 2256 #undef __FUNCT__ 2257 #define __FUNCT__ "MatAXPY_MPIAIJ" 2258 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2259 { 2260 PetscErrorCode ierr; 2261 PetscInt i; 2262 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 2263 PetscBLASInt bnz,one=1; 2264 Mat_SeqAIJ *x,*y; 2265 2266 PetscFunctionBegin; 2267 if (str == SAME_NONZERO_PATTERN) { 2268 PetscScalar alpha = a; 2269 x = (Mat_SeqAIJ *)xx->A->data; 2270 y = (Mat_SeqAIJ *)yy->A->data; 2271 bnz = PetscBLASIntCast(x->nz); 2272 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2273 x = (Mat_SeqAIJ *)xx->B->data; 2274 y = (Mat_SeqAIJ *)yy->B->data; 2275 bnz = PetscBLASIntCast(x->nz); 2276 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2277 } else if (str == SUBSET_NONZERO_PATTERN) { 2278 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2279 2280 x = (Mat_SeqAIJ *)xx->B->data; 2281 y = (Mat_SeqAIJ *)yy->B->data; 2282 if (y->xtoy && y->XtoY != xx->B) { 2283 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2284 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2285 } 2286 if (!y->xtoy) { /* get xtoy */ 2287 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2288 y->XtoY = xx->B; 2289 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2290 } 2291 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2292 } else { 2293 Mat B; 2294 PetscInt *nnz_d,*nnz_o; 2295 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2296 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2297 ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr); 2298 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2299 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2300 ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr); 2301 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2302 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2303 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2304 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2305 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2306 ierr = MatHeaderReplace(Y,B); 2307 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2308 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2309 } 2310 PetscFunctionReturn(0); 2311 } 2312 2313 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2314 2315 #undef __FUNCT__ 2316 #define __FUNCT__ "MatConjugate_MPIAIJ" 2317 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2318 { 2319 #if defined(PETSC_USE_COMPLEX) 2320 PetscErrorCode ierr; 2321 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2322 2323 PetscFunctionBegin; 2324 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2325 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2326 #else 2327 PetscFunctionBegin; 2328 #endif 2329 PetscFunctionReturn(0); 2330 } 2331 2332 #undef __FUNCT__ 2333 #define __FUNCT__ "MatRealPart_MPIAIJ" 2334 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2335 { 2336 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2337 PetscErrorCode ierr; 2338 2339 PetscFunctionBegin; 2340 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2341 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2342 PetscFunctionReturn(0); 2343 } 2344 2345 #undef __FUNCT__ 2346 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2347 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2348 { 2349 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2350 PetscErrorCode ierr; 2351 2352 PetscFunctionBegin; 2353 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2354 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2355 PetscFunctionReturn(0); 2356 } 2357 2358 #ifdef PETSC_HAVE_PBGL 2359 2360 #include <boost/parallel/mpi/bsp_process_group.hpp> 2361 #include <boost/graph/distributed/ilu_default_graph.hpp> 2362 #include <boost/graph/distributed/ilu_0_block.hpp> 2363 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2364 #include <boost/graph/distributed/petsc/interface.hpp> 2365 #include <boost/multi_array.hpp> 2366 #include <boost/parallel/distributed_property_map->hpp> 2367 2368 #undef __FUNCT__ 2369 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2370 /* 2371 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2372 */ 2373 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2374 { 2375 namespace petsc = boost::distributed::petsc; 2376 2377 namespace graph_dist = boost::graph::distributed; 2378 using boost::graph::distributed::ilu_default::process_group_type; 2379 using boost::graph::ilu_permuted; 2380 2381 PetscBool row_identity, col_identity; 2382 PetscContainer c; 2383 PetscInt m, n, M, N; 2384 PetscErrorCode ierr; 2385 2386 PetscFunctionBegin; 2387 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2388 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2389 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2390 if (!row_identity || !col_identity) { 2391 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2392 } 2393 2394 process_group_type pg; 2395 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2396 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2397 lgraph_type& level_graph = *lgraph_p; 2398 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2399 2400 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2401 ilu_permuted(level_graph); 2402 2403 /* put together the new matrix */ 2404 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 2405 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2406 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2407 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2408 ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs); CHKERRQ(ierr); 2409 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2410 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2411 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2412 2413 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 2414 ierr = PetscContainerSetPointer(c, lgraph_p); 2415 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2416 ierr = PetscContainerDestroy(&c); 2417 PetscFunctionReturn(0); 2418 } 2419 2420 #undef __FUNCT__ 2421 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2422 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2423 { 2424 PetscFunctionBegin; 2425 PetscFunctionReturn(0); 2426 } 2427 2428 #undef __FUNCT__ 2429 #define __FUNCT__ "MatSolve_MPIAIJ" 2430 /* 2431 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2432 */ 2433 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2434 { 2435 namespace graph_dist = boost::graph::distributed; 2436 2437 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2438 lgraph_type* lgraph_p; 2439 PetscContainer c; 2440 PetscErrorCode ierr; 2441 2442 PetscFunctionBegin; 2443 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 2444 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 2445 ierr = VecCopy(b, x);CHKERRQ(ierr); 2446 2447 PetscScalar* array_x; 2448 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2449 PetscInt sx; 2450 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2451 2452 PetscScalar* array_b; 2453 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2454 PetscInt sb; 2455 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2456 2457 lgraph_type& level_graph = *lgraph_p; 2458 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2459 2460 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2461 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2462 ref_x(array_x, boost::extents[num_vertices(graph)]); 2463 2464 typedef boost::iterator_property_map<array_ref_type::iterator, 2465 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2466 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2467 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2468 2469 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2470 2471 PetscFunctionReturn(0); 2472 } 2473 #endif 2474 2475 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2476 PetscInt nzlocal,nsends,nrecvs; 2477 PetscMPIInt *send_rank,*recv_rank; 2478 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2479 PetscScalar *sbuf_a,**rbuf_a; 2480 PetscErrorCode (*Destroy)(Mat); 2481 } Mat_Redundant; 2482 2483 #undef __FUNCT__ 2484 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2485 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2486 { 2487 PetscErrorCode ierr; 2488 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2489 PetscInt i; 2490 2491 PetscFunctionBegin; 2492 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2493 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2494 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2495 for (i=0; i<redund->nrecvs; i++){ 2496 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2497 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2498 } 2499 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2500 ierr = PetscFree(redund);CHKERRQ(ierr); 2501 PetscFunctionReturn(0); 2502 } 2503 2504 #undef __FUNCT__ 2505 #define __FUNCT__ "MatDestroy_MatRedundant" 2506 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2507 { 2508 PetscErrorCode ierr; 2509 PetscContainer container; 2510 Mat_Redundant *redund=PETSC_NULL; 2511 2512 PetscFunctionBegin; 2513 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2514 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2515 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2516 A->ops->destroy = redund->Destroy; 2517 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2518 if (A->ops->destroy) { 2519 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2520 } 2521 PetscFunctionReturn(0); 2522 } 2523 2524 #undef __FUNCT__ 2525 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2526 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2527 { 2528 PetscMPIInt rank,size; 2529 MPI_Comm comm=((PetscObject)mat)->comm; 2530 PetscErrorCode ierr; 2531 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2532 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2533 PetscInt *rowrange=mat->rmap->range; 2534 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2535 Mat A=aij->A,B=aij->B,C=*matredundant; 2536 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2537 PetscScalar *sbuf_a; 2538 PetscInt nzlocal=a->nz+b->nz; 2539 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2540 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2541 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2542 MatScalar *aworkA,*aworkB; 2543 PetscScalar *vals; 2544 PetscMPIInt tag1,tag2,tag3,imdex; 2545 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2546 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2547 MPI_Status recv_status,*send_status; 2548 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2549 PetscInt **rbuf_j=PETSC_NULL; 2550 PetscScalar **rbuf_a=PETSC_NULL; 2551 Mat_Redundant *redund=PETSC_NULL; 2552 PetscContainer container; 2553 2554 PetscFunctionBegin; 2555 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2556 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2557 2558 if (reuse == MAT_REUSE_MATRIX) { 2559 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2560 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2561 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2562 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2563 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2564 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2565 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2566 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2567 2568 nsends = redund->nsends; 2569 nrecvs = redund->nrecvs; 2570 send_rank = redund->send_rank; 2571 recv_rank = redund->recv_rank; 2572 sbuf_nz = redund->sbuf_nz; 2573 rbuf_nz = redund->rbuf_nz; 2574 sbuf_j = redund->sbuf_j; 2575 sbuf_a = redund->sbuf_a; 2576 rbuf_j = redund->rbuf_j; 2577 rbuf_a = redund->rbuf_a; 2578 } 2579 2580 if (reuse == MAT_INITIAL_MATRIX){ 2581 PetscMPIInt subrank,subsize; 2582 PetscInt nleftover,np_subcomm; 2583 /* get the destination processors' id send_rank, nsends and nrecvs */ 2584 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2585 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2586 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2587 np_subcomm = size/nsubcomm; 2588 nleftover = size - nsubcomm*np_subcomm; 2589 nsends = 0; nrecvs = 0; 2590 for (i=0; i<size; i++){ /* i=rank*/ 2591 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2592 send_rank[nsends] = i; nsends++; 2593 recv_rank[nrecvs++] = i; 2594 } 2595 } 2596 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2597 i = size-nleftover-1; 2598 j = 0; 2599 while (j < nsubcomm - nleftover){ 2600 send_rank[nsends++] = i; 2601 i--; j++; 2602 } 2603 } 2604 2605 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2606 for (i=0; i<nleftover; i++){ 2607 recv_rank[nrecvs++] = size-nleftover+i; 2608 } 2609 } 2610 2611 /* allocate sbuf_j, sbuf_a */ 2612 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2613 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2614 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2615 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2616 2617 /* copy mat's local entries into the buffers */ 2618 if (reuse == MAT_INITIAL_MATRIX){ 2619 rownz_max = 0; 2620 rptr = sbuf_j; 2621 cols = sbuf_j + rend-rstart + 1; 2622 vals = sbuf_a; 2623 rptr[0] = 0; 2624 for (i=0; i<rend-rstart; i++){ 2625 row = i + rstart; 2626 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2627 ncols = nzA + nzB; 2628 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2629 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2630 /* load the column indices for this row into cols */ 2631 lwrite = 0; 2632 for (l=0; l<nzB; l++) { 2633 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2634 vals[lwrite] = aworkB[l]; 2635 cols[lwrite++] = ctmp; 2636 } 2637 } 2638 for (l=0; l<nzA; l++){ 2639 vals[lwrite] = aworkA[l]; 2640 cols[lwrite++] = cstart + cworkA[l]; 2641 } 2642 for (l=0; l<nzB; l++) { 2643 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2644 vals[lwrite] = aworkB[l]; 2645 cols[lwrite++] = ctmp; 2646 } 2647 } 2648 vals += ncols; 2649 cols += ncols; 2650 rptr[i+1] = rptr[i] + ncols; 2651 if (rownz_max < ncols) rownz_max = ncols; 2652 } 2653 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2654 } else { /* only copy matrix values into sbuf_a */ 2655 rptr = sbuf_j; 2656 vals = sbuf_a; 2657 rptr[0] = 0; 2658 for (i=0; i<rend-rstart; i++){ 2659 row = i + rstart; 2660 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2661 ncols = nzA + nzB; 2662 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2663 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2664 lwrite = 0; 2665 for (l=0; l<nzB; l++) { 2666 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2667 } 2668 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2669 for (l=0; l<nzB; l++) { 2670 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2671 } 2672 vals += ncols; 2673 rptr[i+1] = rptr[i] + ncols; 2674 } 2675 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2676 2677 /* send nzlocal to others, and recv other's nzlocal */ 2678 /*--------------------------------------------------*/ 2679 if (reuse == MAT_INITIAL_MATRIX){ 2680 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2681 s_waits2 = s_waits3 + nsends; 2682 s_waits1 = s_waits2 + nsends; 2683 r_waits1 = s_waits1 + nsends; 2684 r_waits2 = r_waits1 + nrecvs; 2685 r_waits3 = r_waits2 + nrecvs; 2686 } else { 2687 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2688 r_waits3 = s_waits3 + nsends; 2689 } 2690 2691 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2692 if (reuse == MAT_INITIAL_MATRIX){ 2693 /* get new tags to keep the communication clean */ 2694 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2695 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2696 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2697 2698 /* post receives of other's nzlocal */ 2699 for (i=0; i<nrecvs; i++){ 2700 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2701 } 2702 /* send nzlocal to others */ 2703 for (i=0; i<nsends; i++){ 2704 sbuf_nz[i] = nzlocal; 2705 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2706 } 2707 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2708 count = nrecvs; 2709 while (count) { 2710 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2711 recv_rank[imdex] = recv_status.MPI_SOURCE; 2712 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2713 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2714 2715 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2716 rbuf_nz[imdex] += i + 2; 2717 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2718 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2719 count--; 2720 } 2721 /* wait on sends of nzlocal */ 2722 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2723 /* send mat->i,j to others, and recv from other's */ 2724 /*------------------------------------------------*/ 2725 for (i=0; i<nsends; i++){ 2726 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2727 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2728 } 2729 /* wait on receives of mat->i,j */ 2730 /*------------------------------*/ 2731 count = nrecvs; 2732 while (count) { 2733 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2734 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2735 count--; 2736 } 2737 /* wait on sends of mat->i,j */ 2738 /*---------------------------*/ 2739 if (nsends) { 2740 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2741 } 2742 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2743 2744 /* post receives, send and receive mat->a */ 2745 /*----------------------------------------*/ 2746 for (imdex=0; imdex<nrecvs; imdex++) { 2747 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2748 } 2749 for (i=0; i<nsends; i++){ 2750 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2751 } 2752 count = nrecvs; 2753 while (count) { 2754 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2755 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2756 count--; 2757 } 2758 if (nsends) { 2759 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2760 } 2761 2762 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2763 2764 /* create redundant matrix */ 2765 /*-------------------------*/ 2766 if (reuse == MAT_INITIAL_MATRIX){ 2767 /* compute rownz_max for preallocation */ 2768 for (imdex=0; imdex<nrecvs; imdex++){ 2769 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2770 rptr = rbuf_j[imdex]; 2771 for (i=0; i<j; i++){ 2772 ncols = rptr[i+1] - rptr[i]; 2773 if (rownz_max < ncols) rownz_max = ncols; 2774 } 2775 } 2776 2777 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2778 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2779 ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs); CHKERRQ(ierr); 2780 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2781 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2782 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2783 } else { 2784 C = *matredundant; 2785 } 2786 2787 /* insert local matrix entries */ 2788 rptr = sbuf_j; 2789 cols = sbuf_j + rend-rstart + 1; 2790 vals = sbuf_a; 2791 for (i=0; i<rend-rstart; i++){ 2792 row = i + rstart; 2793 ncols = rptr[i+1] - rptr[i]; 2794 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2795 vals += ncols; 2796 cols += ncols; 2797 } 2798 /* insert received matrix entries */ 2799 for (imdex=0; imdex<nrecvs; imdex++){ 2800 rstart = rowrange[recv_rank[imdex]]; 2801 rend = rowrange[recv_rank[imdex]+1]; 2802 rptr = rbuf_j[imdex]; 2803 cols = rbuf_j[imdex] + rend-rstart + 1; 2804 vals = rbuf_a[imdex]; 2805 for (i=0; i<rend-rstart; i++){ 2806 row = i + rstart; 2807 ncols = rptr[i+1] - rptr[i]; 2808 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2809 vals += ncols; 2810 cols += ncols; 2811 } 2812 } 2813 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2814 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2815 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2816 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2817 if (reuse == MAT_INITIAL_MATRIX) { 2818 PetscContainer container; 2819 *matredundant = C; 2820 /* create a supporting struct and attach it to C for reuse */ 2821 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2822 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2823 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2824 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2825 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2826 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 2827 2828 redund->nzlocal = nzlocal; 2829 redund->nsends = nsends; 2830 redund->nrecvs = nrecvs; 2831 redund->send_rank = send_rank; 2832 redund->recv_rank = recv_rank; 2833 redund->sbuf_nz = sbuf_nz; 2834 redund->rbuf_nz = rbuf_nz; 2835 redund->sbuf_j = sbuf_j; 2836 redund->sbuf_a = sbuf_a; 2837 redund->rbuf_j = rbuf_j; 2838 redund->rbuf_a = rbuf_a; 2839 2840 redund->Destroy = C->ops->destroy; 2841 C->ops->destroy = MatDestroy_MatRedundant; 2842 } 2843 PetscFunctionReturn(0); 2844 } 2845 2846 #undef __FUNCT__ 2847 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2848 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2849 { 2850 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2851 PetscErrorCode ierr; 2852 PetscInt i,*idxb = 0; 2853 PetscScalar *va,*vb; 2854 Vec vtmp; 2855 2856 PetscFunctionBegin; 2857 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2858 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2859 if (idx) { 2860 for (i=0; i<A->rmap->n; i++) { 2861 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2862 } 2863 } 2864 2865 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2866 if (idx) { 2867 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2868 } 2869 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2870 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2871 2872 for (i=0; i<A->rmap->n; i++){ 2873 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2874 va[i] = vb[i]; 2875 if (idx) idx[i] = a->garray[idxb[i]]; 2876 } 2877 } 2878 2879 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2880 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2881 ierr = PetscFree(idxb);CHKERRQ(ierr); 2882 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2883 PetscFunctionReturn(0); 2884 } 2885 2886 #undef __FUNCT__ 2887 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2888 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2889 { 2890 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2891 PetscErrorCode ierr; 2892 PetscInt i,*idxb = 0; 2893 PetscScalar *va,*vb; 2894 Vec vtmp; 2895 2896 PetscFunctionBegin; 2897 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2898 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2899 if (idx) { 2900 for (i=0; i<A->cmap->n; i++) { 2901 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2902 } 2903 } 2904 2905 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2906 if (idx) { 2907 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2908 } 2909 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2910 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2911 2912 for (i=0; i<A->rmap->n; i++){ 2913 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2914 va[i] = vb[i]; 2915 if (idx) idx[i] = a->garray[idxb[i]]; 2916 } 2917 } 2918 2919 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2920 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2921 ierr = PetscFree(idxb);CHKERRQ(ierr); 2922 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2923 PetscFunctionReturn(0); 2924 } 2925 2926 #undef __FUNCT__ 2927 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2928 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2929 { 2930 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2931 PetscInt n = A->rmap->n; 2932 PetscInt cstart = A->cmap->rstart; 2933 PetscInt *cmap = mat->garray; 2934 PetscInt *diagIdx, *offdiagIdx; 2935 Vec diagV, offdiagV; 2936 PetscScalar *a, *diagA, *offdiagA; 2937 PetscInt r; 2938 PetscErrorCode ierr; 2939 2940 PetscFunctionBegin; 2941 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2942 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2943 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2944 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2945 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2946 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2947 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2948 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2949 for (r = 0; r < n; ++r) { 2950 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2951 a[r] = diagA[r]; 2952 idx[r] = cstart + diagIdx[r]; 2953 } else { 2954 a[r] = offdiagA[r]; 2955 idx[r] = cmap[offdiagIdx[r]]; 2956 } 2957 } 2958 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2959 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2960 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2961 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2962 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2963 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2964 PetscFunctionReturn(0); 2965 } 2966 2967 #undef __FUNCT__ 2968 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2969 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2970 { 2971 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2972 PetscInt n = A->rmap->n; 2973 PetscInt cstart = A->cmap->rstart; 2974 PetscInt *cmap = mat->garray; 2975 PetscInt *diagIdx, *offdiagIdx; 2976 Vec diagV, offdiagV; 2977 PetscScalar *a, *diagA, *offdiagA; 2978 PetscInt r; 2979 PetscErrorCode ierr; 2980 2981 PetscFunctionBegin; 2982 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2983 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2984 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2985 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2986 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2987 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2988 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2989 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2990 for (r = 0; r < n; ++r) { 2991 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2992 a[r] = diagA[r]; 2993 idx[r] = cstart + diagIdx[r]; 2994 } else { 2995 a[r] = offdiagA[r]; 2996 idx[r] = cmap[offdiagIdx[r]]; 2997 } 2998 } 2999 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3000 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3001 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3002 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3003 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3004 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3005 PetscFunctionReturn(0); 3006 } 3007 3008 #undef __FUNCT__ 3009 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 3010 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 3011 { 3012 PetscErrorCode ierr; 3013 Mat *dummy; 3014 3015 PetscFunctionBegin; 3016 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 3017 *newmat = *dummy; 3018 ierr = PetscFree(dummy);CHKERRQ(ierr); 3019 PetscFunctionReturn(0); 3020 } 3021 3022 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 3023 3024 #undef __FUNCT__ 3025 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 3026 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3027 { 3028 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3029 PetscErrorCode ierr; 3030 3031 PetscFunctionBegin; 3032 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3033 PetscFunctionReturn(0); 3034 } 3035 3036 #undef __FUNCT__ 3037 #define __FUNCT__ "MatSetRandom_MPIAIJ" 3038 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 3039 { 3040 PetscErrorCode ierr; 3041 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 3042 3043 PetscFunctionBegin; 3044 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 3045 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 3046 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3047 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3048 PetscFunctionReturn(0); 3049 } 3050 3051 /* -------------------------------------------------------------------*/ 3052 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3053 MatGetRow_MPIAIJ, 3054 MatRestoreRow_MPIAIJ, 3055 MatMult_MPIAIJ, 3056 /* 4*/ MatMultAdd_MPIAIJ, 3057 MatMultTranspose_MPIAIJ, 3058 MatMultTransposeAdd_MPIAIJ, 3059 #ifdef PETSC_HAVE_PBGL 3060 MatSolve_MPIAIJ, 3061 #else 3062 0, 3063 #endif 3064 0, 3065 0, 3066 /*10*/ 0, 3067 0, 3068 0, 3069 MatSOR_MPIAIJ, 3070 MatTranspose_MPIAIJ, 3071 /*15*/ MatGetInfo_MPIAIJ, 3072 MatEqual_MPIAIJ, 3073 MatGetDiagonal_MPIAIJ, 3074 MatDiagonalScale_MPIAIJ, 3075 MatNorm_MPIAIJ, 3076 /*20*/ MatAssemblyBegin_MPIAIJ, 3077 MatAssemblyEnd_MPIAIJ, 3078 MatSetOption_MPIAIJ, 3079 MatZeroEntries_MPIAIJ, 3080 /*24*/ MatZeroRows_MPIAIJ, 3081 0, 3082 #ifdef PETSC_HAVE_PBGL 3083 0, 3084 #else 3085 0, 3086 #endif 3087 0, 3088 0, 3089 /*29*/ MatSetUp_MPIAIJ, 3090 #ifdef PETSC_HAVE_PBGL 3091 0, 3092 #else 3093 0, 3094 #endif 3095 0, 3096 0, 3097 0, 3098 /*34*/ MatDuplicate_MPIAIJ, 3099 0, 3100 0, 3101 0, 3102 0, 3103 /*39*/ MatAXPY_MPIAIJ, 3104 MatGetSubMatrices_MPIAIJ, 3105 MatIncreaseOverlap_MPIAIJ, 3106 MatGetValues_MPIAIJ, 3107 MatCopy_MPIAIJ, 3108 /*44*/ MatGetRowMax_MPIAIJ, 3109 MatScale_MPIAIJ, 3110 0, 3111 0, 3112 MatZeroRowsColumns_MPIAIJ, 3113 /*49*/ MatSetRandom_MPIAIJ, 3114 0, 3115 0, 3116 0, 3117 0, 3118 /*54*/ MatFDColoringCreate_MPIAIJ, 3119 0, 3120 MatSetUnfactored_MPIAIJ, 3121 0, /* MatPermute_MPIAIJ, impl currently broken */ 3122 0, 3123 /*59*/ MatGetSubMatrix_MPIAIJ, 3124 MatDestroy_MPIAIJ, 3125 MatView_MPIAIJ, 3126 0, 3127 0, 3128 /*64*/ 0, 3129 0, 3130 0, 3131 0, 3132 0, 3133 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3134 MatGetRowMinAbs_MPIAIJ, 3135 0, 3136 MatSetColoring_MPIAIJ, 3137 #if defined(PETSC_HAVE_ADIC) 3138 MatSetValuesAdic_MPIAIJ, 3139 #else 3140 0, 3141 #endif 3142 MatSetValuesAdifor_MPIAIJ, 3143 /*75*/ MatFDColoringApply_AIJ, 3144 0, 3145 0, 3146 0, 3147 MatFindZeroDiagonals_MPIAIJ, 3148 /*80*/ 0, 3149 0, 3150 0, 3151 /*83*/ MatLoad_MPIAIJ, 3152 0, 3153 0, 3154 0, 3155 0, 3156 0, 3157 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3158 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3159 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3160 MatPtAP_Basic, 3161 MatPtAPSymbolic_MPIAIJ, 3162 /*94*/ MatPtAPNumeric_MPIAIJ, 3163 0, 3164 0, 3165 0, 3166 0, 3167 /*99*/ 0, 3168 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3169 MatPtAPNumeric_MPIAIJ_MPIAIJ, 3170 MatConjugate_MPIAIJ, 3171 0, 3172 /*104*/MatSetValuesRow_MPIAIJ, 3173 MatRealPart_MPIAIJ, 3174 MatImaginaryPart_MPIAIJ, 3175 0, 3176 0, 3177 /*109*/0, 3178 MatGetRedundantMatrix_MPIAIJ, 3179 MatGetRowMin_MPIAIJ, 3180 0, 3181 0, 3182 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3183 0, 3184 0, 3185 0, 3186 0, 3187 /*119*/0, 3188 0, 3189 0, 3190 0, 3191 MatGetMultiProcBlock_MPIAIJ, 3192 /*124*/MatFindNonzeroRows_MPIAIJ, 3193 MatGetColumnNorms_MPIAIJ, 3194 MatInvertBlockDiagonal_MPIAIJ, 3195 0, 3196 MatGetSubMatricesParallel_MPIAIJ, 3197 /*129*/0, 3198 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3199 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3200 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3201 0, 3202 /*134*/0, 3203 0, 3204 0, 3205 0, 3206 0 3207 }; 3208 3209 /* ----------------------------------------------------------------------------------------*/ 3210 3211 EXTERN_C_BEGIN 3212 #undef __FUNCT__ 3213 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3214 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3215 { 3216 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3217 PetscErrorCode ierr; 3218 3219 PetscFunctionBegin; 3220 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3221 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3222 PetscFunctionReturn(0); 3223 } 3224 EXTERN_C_END 3225 3226 EXTERN_C_BEGIN 3227 #undef __FUNCT__ 3228 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3229 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3230 { 3231 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3232 PetscErrorCode ierr; 3233 3234 PetscFunctionBegin; 3235 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3236 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3237 PetscFunctionReturn(0); 3238 } 3239 EXTERN_C_END 3240 3241 EXTERN_C_BEGIN 3242 #undef __FUNCT__ 3243 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3244 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3245 { 3246 Mat_MPIAIJ *b; 3247 PetscErrorCode ierr; 3248 PetscInt i; 3249 PetscBool d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE; 3250 3251 PetscFunctionBegin; 3252 if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE; 3253 if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE; 3254 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3255 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3256 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3257 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3258 3259 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3260 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3261 if (d_nnz) { 3262 for (i=0; i<B->rmap->n; i++) { 3263 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 3264 } 3265 } 3266 if (o_nnz) { 3267 for (i=0; i<B->rmap->n; i++) { 3268 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 3269 } 3270 } 3271 b = (Mat_MPIAIJ*)B->data; 3272 3273 if (!B->preallocated) { 3274 /* Explicitly create 2 MATSEQAIJ matrices. */ 3275 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3276 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3277 ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3278 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3279 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3280 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3281 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3282 ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3283 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3284 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3285 } 3286 3287 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3288 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3289 /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */ 3290 if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3291 if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3292 B->preallocated = PETSC_TRUE; 3293 PetscFunctionReturn(0); 3294 } 3295 EXTERN_C_END 3296 3297 #undef __FUNCT__ 3298 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3299 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3300 { 3301 Mat mat; 3302 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3303 PetscErrorCode ierr; 3304 3305 PetscFunctionBegin; 3306 *newmat = 0; 3307 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3308 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3309 ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr); 3310 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3311 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3312 a = (Mat_MPIAIJ*)mat->data; 3313 3314 mat->factortype = matin->factortype; 3315 mat->rmap->bs = matin->rmap->bs; 3316 mat->cmap->bs = matin->cmap->bs; 3317 mat->assembled = PETSC_TRUE; 3318 mat->insertmode = NOT_SET_VALUES; 3319 mat->preallocated = PETSC_TRUE; 3320 3321 a->size = oldmat->size; 3322 a->rank = oldmat->rank; 3323 a->donotstash = oldmat->donotstash; 3324 a->roworiented = oldmat->roworiented; 3325 a->rowindices = 0; 3326 a->rowvalues = 0; 3327 a->getrowactive = PETSC_FALSE; 3328 3329 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3330 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3331 3332 if (oldmat->colmap) { 3333 #if defined (PETSC_USE_CTABLE) 3334 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3335 #else 3336 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3337 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3338 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3339 #endif 3340 } else a->colmap = 0; 3341 if (oldmat->garray) { 3342 PetscInt len; 3343 len = oldmat->B->cmap->n; 3344 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3345 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3346 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3347 } else a->garray = 0; 3348 3349 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3350 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3351 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3352 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3353 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3354 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3355 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3356 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3357 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3358 *newmat = mat; 3359 PetscFunctionReturn(0); 3360 } 3361 3362 3363 3364 #undef __FUNCT__ 3365 #define __FUNCT__ "MatLoad_MPIAIJ" 3366 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3367 { 3368 PetscScalar *vals,*svals; 3369 MPI_Comm comm = ((PetscObject)viewer)->comm; 3370 PetscErrorCode ierr; 3371 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3372 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3373 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3374 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 3375 PetscInt cend,cstart,n,*rowners,sizesset=1; 3376 int fd; 3377 PetscInt bs = 1; 3378 3379 PetscFunctionBegin; 3380 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3381 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3382 if (!rank) { 3383 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3384 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3385 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3386 } 3387 3388 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3389 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3390 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3391 3392 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3393 3394 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3395 M = header[1]; N = header[2]; 3396 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3397 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3398 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3399 3400 /* If global sizes are set, check if they are consistent with that given in the file */ 3401 if (sizesset) { 3402 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3403 } 3404 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3405 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3406 3407 /* determine ownership of all (block) rows */ 3408 if ( M%bs ) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3409 if (newMat->rmap->n < 0 ) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3410 else m = newMat->rmap->n; /* Set by user */ 3411 3412 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3413 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3414 3415 /* First process needs enough room for process with most rows */ 3416 if (!rank) { 3417 mmax = rowners[1]; 3418 for (i=2; i<=size; i++) { 3419 mmax = PetscMax(mmax, rowners[i]); 3420 } 3421 } else mmax = m; 3422 3423 rowners[0] = 0; 3424 for (i=2; i<=size; i++) { 3425 rowners[i] += rowners[i-1]; 3426 } 3427 rstart = rowners[rank]; 3428 rend = rowners[rank+1]; 3429 3430 /* distribute row lengths to all processors */ 3431 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 3432 if (!rank) { 3433 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3434 ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3435 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3436 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3437 for (j=0; j<m; j++) { 3438 procsnz[0] += ourlens[j]; 3439 } 3440 for (i=1; i<size; i++) { 3441 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3442 /* calculate the number of nonzeros on each processor */ 3443 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3444 procsnz[i] += rowlengths[j]; 3445 } 3446 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3447 } 3448 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3449 } else { 3450 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3451 } 3452 3453 if (!rank) { 3454 /* determine max buffer needed and allocate it */ 3455 maxnz = 0; 3456 for (i=0; i<size; i++) { 3457 maxnz = PetscMax(maxnz,procsnz[i]); 3458 } 3459 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3460 3461 /* read in my part of the matrix column indices */ 3462 nz = procsnz[0]; 3463 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3464 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3465 3466 /* read in every one elses and ship off */ 3467 for (i=1; i<size; i++) { 3468 nz = procsnz[i]; 3469 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3470 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3471 } 3472 ierr = PetscFree(cols);CHKERRQ(ierr); 3473 } else { 3474 /* determine buffer space needed for message */ 3475 nz = 0; 3476 for (i=0; i<m; i++) { 3477 nz += ourlens[i]; 3478 } 3479 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3480 3481 /* receive message of column indices*/ 3482 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3483 } 3484 3485 /* determine column ownership if matrix is not square */ 3486 if (N != M) { 3487 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3488 else n = newMat->cmap->n; 3489 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3490 cstart = cend - n; 3491 } else { 3492 cstart = rstart; 3493 cend = rend; 3494 n = cend - cstart; 3495 } 3496 3497 /* loop over local rows, determining number of off diagonal entries */ 3498 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3499 jj = 0; 3500 for (i=0; i<m; i++) { 3501 for (j=0; j<ourlens[i]; j++) { 3502 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3503 jj++; 3504 } 3505 } 3506 3507 for (i=0; i<m; i++) { 3508 ourlens[i] -= offlens[i]; 3509 } 3510 if (!sizesset) { 3511 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3512 } 3513 3514 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3515 3516 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3517 3518 for (i=0; i<m; i++) { 3519 ourlens[i] += offlens[i]; 3520 } 3521 3522 if (!rank) { 3523 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3524 3525 /* read in my part of the matrix numerical values */ 3526 nz = procsnz[0]; 3527 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3528 3529 /* insert into matrix */ 3530 jj = rstart; 3531 smycols = mycols; 3532 svals = vals; 3533 for (i=0; i<m; i++) { 3534 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3535 smycols += ourlens[i]; 3536 svals += ourlens[i]; 3537 jj++; 3538 } 3539 3540 /* read in other processors and ship out */ 3541 for (i=1; i<size; i++) { 3542 nz = procsnz[i]; 3543 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3544 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3545 } 3546 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3547 } else { 3548 /* receive numeric values */ 3549 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3550 3551 /* receive message of values*/ 3552 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3553 3554 /* insert into matrix */ 3555 jj = rstart; 3556 smycols = mycols; 3557 svals = vals; 3558 for (i=0; i<m; i++) { 3559 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3560 smycols += ourlens[i]; 3561 svals += ourlens[i]; 3562 jj++; 3563 } 3564 } 3565 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3566 ierr = PetscFree(vals);CHKERRQ(ierr); 3567 ierr = PetscFree(mycols);CHKERRQ(ierr); 3568 ierr = PetscFree(rowners);CHKERRQ(ierr); 3569 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3570 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3571 3572 PetscFunctionReturn(0); 3573 } 3574 3575 #undef __FUNCT__ 3576 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3577 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3578 { 3579 PetscErrorCode ierr; 3580 IS iscol_local; 3581 PetscInt csize; 3582 3583 PetscFunctionBegin; 3584 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3585 if (call == MAT_REUSE_MATRIX) { 3586 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3587 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3588 } else { 3589 PetscInt cbs; 3590 ierr = ISGetBlockSize(iscol,&cbs); CHKERRQ(ierr); 3591 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3592 ierr = ISSetBlockSize(iscol_local,cbs); CHKERRQ(ierr); 3593 } 3594 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3595 if (call == MAT_INITIAL_MATRIX) { 3596 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3597 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3598 } 3599 PetscFunctionReturn(0); 3600 } 3601 3602 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3603 #undef __FUNCT__ 3604 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3605 /* 3606 Not great since it makes two copies of the submatrix, first an SeqAIJ 3607 in local and then by concatenating the local matrices the end result. 3608 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3609 3610 Note: This requires a sequential iscol with all indices. 3611 */ 3612 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3613 { 3614 PetscErrorCode ierr; 3615 PetscMPIInt rank,size; 3616 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3617 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3618 PetscBool allcolumns, colflag; 3619 Mat M,Mreuse; 3620 MatScalar *vwork,*aa; 3621 MPI_Comm comm = ((PetscObject)mat)->comm; 3622 Mat_SeqAIJ *aij; 3623 3624 3625 PetscFunctionBegin; 3626 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3627 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3628 3629 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3630 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3631 if (colflag && ncol == mat->cmap->N){ 3632 allcolumns = PETSC_TRUE; 3633 } else { 3634 allcolumns = PETSC_FALSE; 3635 } 3636 if (call == MAT_REUSE_MATRIX) { 3637 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3638 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3639 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3640 } else { 3641 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3642 } 3643 3644 /* 3645 m - number of local rows 3646 n - number of columns (same on all processors) 3647 rstart - first row in new global matrix generated 3648 */ 3649 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3650 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3651 if (call == MAT_INITIAL_MATRIX) { 3652 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3653 ii = aij->i; 3654 jj = aij->j; 3655 3656 /* 3657 Determine the number of non-zeros in the diagonal and off-diagonal 3658 portions of the matrix in order to do correct preallocation 3659 */ 3660 3661 /* first get start and end of "diagonal" columns */ 3662 if (csize == PETSC_DECIDE) { 3663 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3664 if (mglobal == n) { /* square matrix */ 3665 nlocal = m; 3666 } else { 3667 nlocal = n/size + ((n % size) > rank); 3668 } 3669 } else { 3670 nlocal = csize; 3671 } 3672 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3673 rstart = rend - nlocal; 3674 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3675 3676 /* next, compute all the lengths */ 3677 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3678 olens = dlens + m; 3679 for (i=0; i<m; i++) { 3680 jend = ii[i+1] - ii[i]; 3681 olen = 0; 3682 dlen = 0; 3683 for (j=0; j<jend; j++) { 3684 if (*jj < rstart || *jj >= rend) olen++; 3685 else dlen++; 3686 jj++; 3687 } 3688 olens[i] = olen; 3689 dlens[i] = dlen; 3690 } 3691 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3692 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3693 ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr); 3694 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3695 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3696 ierr = PetscFree(dlens);CHKERRQ(ierr); 3697 } else { 3698 PetscInt ml,nl; 3699 3700 M = *newmat; 3701 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3702 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3703 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3704 /* 3705 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3706 rather than the slower MatSetValues(). 3707 */ 3708 M->was_assembled = PETSC_TRUE; 3709 M->assembled = PETSC_FALSE; 3710 } 3711 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3712 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3713 ii = aij->i; 3714 jj = aij->j; 3715 aa = aij->a; 3716 for (i=0; i<m; i++) { 3717 row = rstart + i; 3718 nz = ii[i+1] - ii[i]; 3719 cwork = jj; jj += nz; 3720 vwork = aa; aa += nz; 3721 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3722 } 3723 3724 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3725 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3726 *newmat = M; 3727 3728 /* save submatrix used in processor for next request */ 3729 if (call == MAT_INITIAL_MATRIX) { 3730 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3731 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3732 } 3733 3734 PetscFunctionReturn(0); 3735 } 3736 3737 EXTERN_C_BEGIN 3738 #undef __FUNCT__ 3739 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3740 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3741 { 3742 PetscInt m,cstart, cend,j,nnz,i,d; 3743 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3744 const PetscInt *JJ; 3745 PetscScalar *values; 3746 PetscErrorCode ierr; 3747 3748 PetscFunctionBegin; 3749 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3750 3751 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3752 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3753 m = B->rmap->n; 3754 cstart = B->cmap->rstart; 3755 cend = B->cmap->rend; 3756 rstart = B->rmap->rstart; 3757 3758 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3759 3760 #if defined(PETSC_USE_DEBUGGING) 3761 for (i=0; i<m; i++) { 3762 nnz = Ii[i+1]- Ii[i]; 3763 JJ = J + Ii[i]; 3764 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3765 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3766 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3767 } 3768 #endif 3769 3770 for (i=0; i<m; i++) { 3771 nnz = Ii[i+1]- Ii[i]; 3772 JJ = J + Ii[i]; 3773 nnz_max = PetscMax(nnz_max,nnz); 3774 d = 0; 3775 for (j=0; j<nnz; j++) { 3776 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3777 } 3778 d_nnz[i] = d; 3779 o_nnz[i] = nnz - d; 3780 } 3781 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3782 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3783 3784 if (v) values = (PetscScalar*)v; 3785 else { 3786 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3787 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3788 } 3789 3790 for (i=0; i<m; i++) { 3791 ii = i + rstart; 3792 nnz = Ii[i+1]- Ii[i]; 3793 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3794 } 3795 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3796 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3797 3798 if (!v) { 3799 ierr = PetscFree(values);CHKERRQ(ierr); 3800 } 3801 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3802 PetscFunctionReturn(0); 3803 } 3804 EXTERN_C_END 3805 3806 #undef __FUNCT__ 3807 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3808 /*@ 3809 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3810 (the default parallel PETSc format). 3811 3812 Collective on MPI_Comm 3813 3814 Input Parameters: 3815 + B - the matrix 3816 . i - the indices into j for the start of each local row (starts with zero) 3817 . j - the column indices for each local row (starts with zero) 3818 - v - optional values in the matrix 3819 3820 Level: developer 3821 3822 Notes: 3823 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3824 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3825 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3826 3827 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3828 3829 The format which is used for the sparse matrix input, is equivalent to a 3830 row-major ordering.. i.e for the following matrix, the input data expected is 3831 as shown: 3832 3833 1 0 0 3834 2 0 3 P0 3835 ------- 3836 4 5 6 P1 3837 3838 Process0 [P0]: rows_owned=[0,1] 3839 i = {0,1,3} [size = nrow+1 = 2+1] 3840 j = {0,0,2} [size = nz = 6] 3841 v = {1,2,3} [size = nz = 6] 3842 3843 Process1 [P1]: rows_owned=[2] 3844 i = {0,3} [size = nrow+1 = 1+1] 3845 j = {0,1,2} [size = nz = 6] 3846 v = {4,5,6} [size = nz = 6] 3847 3848 .keywords: matrix, aij, compressed row, sparse, parallel 3849 3850 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3851 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3852 @*/ 3853 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3854 { 3855 PetscErrorCode ierr; 3856 3857 PetscFunctionBegin; 3858 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3859 PetscFunctionReturn(0); 3860 } 3861 3862 #undef __FUNCT__ 3863 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3864 /*@C 3865 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3866 (the default parallel PETSc format). For good matrix assembly performance 3867 the user should preallocate the matrix storage by setting the parameters 3868 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3869 performance can be increased by more than a factor of 50. 3870 3871 Collective on MPI_Comm 3872 3873 Input Parameters: 3874 + A - the matrix 3875 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3876 (same value is used for all local rows) 3877 . d_nnz - array containing the number of nonzeros in the various rows of the 3878 DIAGONAL portion of the local submatrix (possibly different for each row) 3879 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3880 The size of this array is equal to the number of local rows, i.e 'm'. 3881 For matrices that will be factored, you must leave room for (and set) 3882 the diagonal entry even if it is zero. 3883 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3884 submatrix (same value is used for all local rows). 3885 - o_nnz - array containing the number of nonzeros in the various rows of the 3886 OFF-DIAGONAL portion of the local submatrix (possibly different for 3887 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3888 structure. The size of this array is equal to the number 3889 of local rows, i.e 'm'. 3890 3891 If the *_nnz parameter is given then the *_nz parameter is ignored 3892 3893 The AIJ format (also called the Yale sparse matrix format or 3894 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3895 storage. The stored row and column indices begin with zero. 3896 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3897 3898 The parallel matrix is partitioned such that the first m0 rows belong to 3899 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3900 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3901 3902 The DIAGONAL portion of the local submatrix of a processor can be defined 3903 as the submatrix which is obtained by extraction the part corresponding to 3904 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3905 first row that belongs to the processor, r2 is the last row belonging to 3906 the this processor, and c1-c2 is range of indices of the local part of a 3907 vector suitable for applying the matrix to. This is an mxn matrix. In the 3908 common case of a square matrix, the row and column ranges are the same and 3909 the DIAGONAL part is also square. The remaining portion of the local 3910 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3911 3912 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3913 3914 You can call MatGetInfo() to get information on how effective the preallocation was; 3915 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3916 You can also run with the option -info and look for messages with the string 3917 malloc in them to see if additional memory allocation was needed. 3918 3919 Example usage: 3920 3921 Consider the following 8x8 matrix with 34 non-zero values, that is 3922 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3923 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3924 as follows: 3925 3926 .vb 3927 1 2 0 | 0 3 0 | 0 4 3928 Proc0 0 5 6 | 7 0 0 | 8 0 3929 9 0 10 | 11 0 0 | 12 0 3930 ------------------------------------- 3931 13 0 14 | 15 16 17 | 0 0 3932 Proc1 0 18 0 | 19 20 21 | 0 0 3933 0 0 0 | 22 23 0 | 24 0 3934 ------------------------------------- 3935 Proc2 25 26 27 | 0 0 28 | 29 0 3936 30 0 0 | 31 32 33 | 0 34 3937 .ve 3938 3939 This can be represented as a collection of submatrices as: 3940 3941 .vb 3942 A B C 3943 D E F 3944 G H I 3945 .ve 3946 3947 Where the submatrices A,B,C are owned by proc0, D,E,F are 3948 owned by proc1, G,H,I are owned by proc2. 3949 3950 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3951 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3952 The 'M','N' parameters are 8,8, and have the same values on all procs. 3953 3954 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3955 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3956 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3957 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3958 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3959 matrix, ans [DF] as another SeqAIJ matrix. 3960 3961 When d_nz, o_nz parameters are specified, d_nz storage elements are 3962 allocated for every row of the local diagonal submatrix, and o_nz 3963 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3964 One way to choose d_nz and o_nz is to use the max nonzerors per local 3965 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3966 In this case, the values of d_nz,o_nz are: 3967 .vb 3968 proc0 : dnz = 2, o_nz = 2 3969 proc1 : dnz = 3, o_nz = 2 3970 proc2 : dnz = 1, o_nz = 4 3971 .ve 3972 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3973 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3974 for proc3. i.e we are using 12+15+10=37 storage locations to store 3975 34 values. 3976 3977 When d_nnz, o_nnz parameters are specified, the storage is specified 3978 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3979 In the above case the values for d_nnz,o_nnz are: 3980 .vb 3981 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3982 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3983 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3984 .ve 3985 Here the space allocated is sum of all the above values i.e 34, and 3986 hence pre-allocation is perfect. 3987 3988 Level: intermediate 3989 3990 .keywords: matrix, aij, compressed row, sparse, parallel 3991 3992 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3993 MPIAIJ, MatGetInfo() 3994 @*/ 3995 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3996 { 3997 PetscErrorCode ierr; 3998 3999 PetscFunctionBegin; 4000 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4001 PetscValidType(B,1); 4002 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4003 PetscFunctionReturn(0); 4004 } 4005 4006 #undef __FUNCT__ 4007 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 4008 /*@ 4009 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4010 CSR format the local rows. 4011 4012 Collective on MPI_Comm 4013 4014 Input Parameters: 4015 + comm - MPI communicator 4016 . m - number of local rows (Cannot be PETSC_DECIDE) 4017 . n - This value should be the same as the local size used in creating the 4018 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4019 calculated if N is given) For square matrices n is almost always m. 4020 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4021 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4022 . i - row indices 4023 . j - column indices 4024 - a - matrix values 4025 4026 Output Parameter: 4027 . mat - the matrix 4028 4029 Level: intermediate 4030 4031 Notes: 4032 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4033 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4034 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4035 4036 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4037 4038 The format which is used for the sparse matrix input, is equivalent to a 4039 row-major ordering.. i.e for the following matrix, the input data expected is 4040 as shown: 4041 4042 1 0 0 4043 2 0 3 P0 4044 ------- 4045 4 5 6 P1 4046 4047 Process0 [P0]: rows_owned=[0,1] 4048 i = {0,1,3} [size = nrow+1 = 2+1] 4049 j = {0,0,2} [size = nz = 6] 4050 v = {1,2,3} [size = nz = 6] 4051 4052 Process1 [P1]: rows_owned=[2] 4053 i = {0,3} [size = nrow+1 = 1+1] 4054 j = {0,1,2} [size = nz = 6] 4055 v = {4,5,6} [size = nz = 6] 4056 4057 .keywords: matrix, aij, compressed row, sparse, parallel 4058 4059 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4060 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4061 @*/ 4062 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4063 { 4064 PetscErrorCode ierr; 4065 4066 PetscFunctionBegin; 4067 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4068 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4069 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4070 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4071 /* ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr); */ 4072 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4073 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4074 PetscFunctionReturn(0); 4075 } 4076 4077 #undef __FUNCT__ 4078 #define __FUNCT__ "MatCreateAIJ" 4079 /*@C 4080 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4081 (the default parallel PETSc format). For good matrix assembly performance 4082 the user should preallocate the matrix storage by setting the parameters 4083 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4084 performance can be increased by more than a factor of 50. 4085 4086 Collective on MPI_Comm 4087 4088 Input Parameters: 4089 + comm - MPI communicator 4090 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4091 This value should be the same as the local size used in creating the 4092 y vector for the matrix-vector product y = Ax. 4093 . n - This value should be the same as the local size used in creating the 4094 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4095 calculated if N is given) For square matrices n is almost always m. 4096 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4097 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4098 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4099 (same value is used for all local rows) 4100 . d_nnz - array containing the number of nonzeros in the various rows of the 4101 DIAGONAL portion of the local submatrix (possibly different for each row) 4102 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 4103 The size of this array is equal to the number of local rows, i.e 'm'. 4104 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4105 submatrix (same value is used for all local rows). 4106 - o_nnz - array containing the number of nonzeros in the various rows of the 4107 OFF-DIAGONAL portion of the local submatrix (possibly different for 4108 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 4109 structure. The size of this array is equal to the number 4110 of local rows, i.e 'm'. 4111 4112 Output Parameter: 4113 . A - the matrix 4114 4115 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4116 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4117 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4118 4119 Notes: 4120 If the *_nnz parameter is given then the *_nz parameter is ignored 4121 4122 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4123 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4124 storage requirements for this matrix. 4125 4126 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4127 processor than it must be used on all processors that share the object for 4128 that argument. 4129 4130 The user MUST specify either the local or global matrix dimensions 4131 (possibly both). 4132 4133 The parallel matrix is partitioned across processors such that the 4134 first m0 rows belong to process 0, the next m1 rows belong to 4135 process 1, the next m2 rows belong to process 2 etc.. where 4136 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4137 values corresponding to [m x N] submatrix. 4138 4139 The columns are logically partitioned with the n0 columns belonging 4140 to 0th partition, the next n1 columns belonging to the next 4141 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4142 4143 The DIAGONAL portion of the local submatrix on any given processor 4144 is the submatrix corresponding to the rows and columns m,n 4145 corresponding to the given processor. i.e diagonal matrix on 4146 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4147 etc. The remaining portion of the local submatrix [m x (N-n)] 4148 constitute the OFF-DIAGONAL portion. The example below better 4149 illustrates this concept. 4150 4151 For a square global matrix we define each processor's diagonal portion 4152 to be its local rows and the corresponding columns (a square submatrix); 4153 each processor's off-diagonal portion encompasses the remainder of the 4154 local matrix (a rectangular submatrix). 4155 4156 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4157 4158 When calling this routine with a single process communicator, a matrix of 4159 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4160 type of communicator, use the construction mechanism: 4161 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4162 4163 By default, this format uses inodes (identical nodes) when possible. 4164 We search for consecutive rows with the same nonzero structure, thereby 4165 reusing matrix information to achieve increased efficiency. 4166 4167 Options Database Keys: 4168 + -mat_no_inode - Do not use inodes 4169 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4170 - -mat_aij_oneindex - Internally use indexing starting at 1 4171 rather than 0. Note that when calling MatSetValues(), 4172 the user still MUST index entries starting at 0! 4173 4174 4175 Example usage: 4176 4177 Consider the following 8x8 matrix with 34 non-zero values, that is 4178 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4179 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4180 as follows: 4181 4182 .vb 4183 1 2 0 | 0 3 0 | 0 4 4184 Proc0 0 5 6 | 7 0 0 | 8 0 4185 9 0 10 | 11 0 0 | 12 0 4186 ------------------------------------- 4187 13 0 14 | 15 16 17 | 0 0 4188 Proc1 0 18 0 | 19 20 21 | 0 0 4189 0 0 0 | 22 23 0 | 24 0 4190 ------------------------------------- 4191 Proc2 25 26 27 | 0 0 28 | 29 0 4192 30 0 0 | 31 32 33 | 0 34 4193 .ve 4194 4195 This can be represented as a collection of submatrices as: 4196 4197 .vb 4198 A B C 4199 D E F 4200 G H I 4201 .ve 4202 4203 Where the submatrices A,B,C are owned by proc0, D,E,F are 4204 owned by proc1, G,H,I are owned by proc2. 4205 4206 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4207 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4208 The 'M','N' parameters are 8,8, and have the same values on all procs. 4209 4210 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4211 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4212 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4213 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4214 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4215 matrix, ans [DF] as another SeqAIJ matrix. 4216 4217 When d_nz, o_nz parameters are specified, d_nz storage elements are 4218 allocated for every row of the local diagonal submatrix, and o_nz 4219 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4220 One way to choose d_nz and o_nz is to use the max nonzerors per local 4221 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4222 In this case, the values of d_nz,o_nz are: 4223 .vb 4224 proc0 : dnz = 2, o_nz = 2 4225 proc1 : dnz = 3, o_nz = 2 4226 proc2 : dnz = 1, o_nz = 4 4227 .ve 4228 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4229 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4230 for proc3. i.e we are using 12+15+10=37 storage locations to store 4231 34 values. 4232 4233 When d_nnz, o_nnz parameters are specified, the storage is specified 4234 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4235 In the above case the values for d_nnz,o_nnz are: 4236 .vb 4237 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4238 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4239 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4240 .ve 4241 Here the space allocated is sum of all the above values i.e 34, and 4242 hence pre-allocation is perfect. 4243 4244 Level: intermediate 4245 4246 .keywords: matrix, aij, compressed row, sparse, parallel 4247 4248 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4249 MPIAIJ, MatCreateMPIAIJWithArrays() 4250 @*/ 4251 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4252 { 4253 PetscErrorCode ierr; 4254 PetscMPIInt size; 4255 4256 PetscFunctionBegin; 4257 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4258 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4259 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4260 if (size > 1) { 4261 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4262 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4263 } else { 4264 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4265 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4266 } 4267 PetscFunctionReturn(0); 4268 } 4269 4270 #undef __FUNCT__ 4271 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4272 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 4273 { 4274 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4275 4276 PetscFunctionBegin; 4277 *Ad = a->A; 4278 *Ao = a->B; 4279 *colmap = a->garray; 4280 PetscFunctionReturn(0); 4281 } 4282 4283 #undef __FUNCT__ 4284 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4285 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4286 { 4287 PetscErrorCode ierr; 4288 PetscInt i; 4289 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4290 4291 PetscFunctionBegin; 4292 if (coloring->ctype == IS_COLORING_GLOBAL) { 4293 ISColoringValue *allcolors,*colors; 4294 ISColoring ocoloring; 4295 4296 /* set coloring for diagonal portion */ 4297 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4298 4299 /* set coloring for off-diagonal portion */ 4300 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 4301 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4302 for (i=0; i<a->B->cmap->n; i++) { 4303 colors[i] = allcolors[a->garray[i]]; 4304 } 4305 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4306 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4307 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4308 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4309 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4310 ISColoringValue *colors; 4311 PetscInt *larray; 4312 ISColoring ocoloring; 4313 4314 /* set coloring for diagonal portion */ 4315 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4316 for (i=0; i<a->A->cmap->n; i++) { 4317 larray[i] = i + A->cmap->rstart; 4318 } 4319 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 4320 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4321 for (i=0; i<a->A->cmap->n; i++) { 4322 colors[i] = coloring->colors[larray[i]]; 4323 } 4324 ierr = PetscFree(larray);CHKERRQ(ierr); 4325 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4326 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4327 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4328 4329 /* set coloring for off-diagonal portion */ 4330 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4331 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 4332 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4333 for (i=0; i<a->B->cmap->n; i++) { 4334 colors[i] = coloring->colors[larray[i]]; 4335 } 4336 ierr = PetscFree(larray);CHKERRQ(ierr); 4337 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4338 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4339 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4340 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4341 4342 PetscFunctionReturn(0); 4343 } 4344 4345 #if defined(PETSC_HAVE_ADIC) 4346 #undef __FUNCT__ 4347 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 4348 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 4349 { 4350 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4351 PetscErrorCode ierr; 4352 4353 PetscFunctionBegin; 4354 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 4355 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 4356 PetscFunctionReturn(0); 4357 } 4358 #endif 4359 4360 #undef __FUNCT__ 4361 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4362 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4363 { 4364 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4365 PetscErrorCode ierr; 4366 4367 PetscFunctionBegin; 4368 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4369 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4370 PetscFunctionReturn(0); 4371 } 4372 4373 #undef __FUNCT__ 4374 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4375 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4376 { 4377 PetscErrorCode ierr; 4378 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4379 PetscInt *indx; 4380 4381 PetscFunctionBegin; 4382 /* This routine will ONLY return MPIAIJ type matrix */ 4383 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4384 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4385 if (n == PETSC_DECIDE){ 4386 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4387 } 4388 /* Check sum(n) = N */ 4389 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4390 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4391 4392 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4393 rstart -= m; 4394 4395 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4396 for (i=0;i<m;i++) { 4397 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4398 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4399 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4400 } 4401 4402 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4403 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE); CHKERRQ(ierr); 4404 ierr = MatSetBlockSizes(*outmat,bs,cbs); CHKERRQ(ierr); 4405 ierr = MatSetType(*outmat,MATMPIAIJ); CHKERRQ(ierr); 4406 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4407 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4408 PetscFunctionReturn(0); 4409 } 4410 4411 #undef __FUNCT__ 4412 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4413 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4414 { 4415 PetscErrorCode ierr; 4416 PetscInt m,N,i,rstart,nnz,Ii; 4417 PetscInt *indx; 4418 PetscScalar *values; 4419 4420 PetscFunctionBegin; 4421 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4422 ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 4423 for (i=0;i<m;i++) { 4424 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4425 Ii = i + rstart; 4426 ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4427 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4428 } 4429 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4430 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4431 PetscFunctionReturn(0); 4432 } 4433 4434 #undef __FUNCT__ 4435 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4436 /*@ 4437 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4438 matrices from each processor 4439 4440 Collective on MPI_Comm 4441 4442 Input Parameters: 4443 + comm - the communicators the parallel matrix will live on 4444 . inmat - the input sequential matrices 4445 . n - number of local columns (or PETSC_DECIDE) 4446 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4447 4448 Output Parameter: 4449 . outmat - the parallel matrix generated 4450 4451 Level: advanced 4452 4453 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4454 4455 @*/ 4456 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4457 { 4458 PetscErrorCode ierr; 4459 4460 PetscFunctionBegin; 4461 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4462 if (scall == MAT_INITIAL_MATRIX){ 4463 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4464 } 4465 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4466 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4467 PetscFunctionReturn(0); 4468 } 4469 4470 #undef __FUNCT__ 4471 #define __FUNCT__ "MatFileSplit" 4472 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4473 { 4474 PetscErrorCode ierr; 4475 PetscMPIInt rank; 4476 PetscInt m,N,i,rstart,nnz; 4477 size_t len; 4478 const PetscInt *indx; 4479 PetscViewer out; 4480 char *name; 4481 Mat B; 4482 const PetscScalar *values; 4483 4484 PetscFunctionBegin; 4485 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4486 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4487 /* Should this be the type of the diagonal block of A? */ 4488 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4489 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4490 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 4491 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4492 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4493 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4494 for (i=0;i<m;i++) { 4495 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4496 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4497 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4498 } 4499 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4500 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4501 4502 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4503 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4504 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4505 sprintf(name,"%s.%d",outfile,rank); 4506 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4507 ierr = PetscFree(name); 4508 ierr = MatView(B,out);CHKERRQ(ierr); 4509 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4510 ierr = MatDestroy(&B);CHKERRQ(ierr); 4511 PetscFunctionReturn(0); 4512 } 4513 4514 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4515 #undef __FUNCT__ 4516 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4517 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4518 { 4519 PetscErrorCode ierr; 4520 Mat_Merge_SeqsToMPI *merge; 4521 PetscContainer container; 4522 4523 PetscFunctionBegin; 4524 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4525 if (container) { 4526 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4527 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4528 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4529 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4530 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4531 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4532 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4533 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4534 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4535 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4536 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4537 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4538 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4539 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4540 ierr = PetscFree(merge);CHKERRQ(ierr); 4541 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4542 } 4543 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4544 PetscFunctionReturn(0); 4545 } 4546 4547 #include <../src/mat/utils/freespace.h> 4548 #include <petscbt.h> 4549 4550 #undef __FUNCT__ 4551 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4552 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4553 { 4554 PetscErrorCode ierr; 4555 MPI_Comm comm=((PetscObject)mpimat)->comm; 4556 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4557 PetscMPIInt size,rank,taga,*len_s; 4558 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4559 PetscInt proc,m; 4560 PetscInt **buf_ri,**buf_rj; 4561 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4562 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4563 MPI_Request *s_waits,*r_waits; 4564 MPI_Status *status; 4565 MatScalar *aa=a->a; 4566 MatScalar **abuf_r,*ba_i; 4567 Mat_Merge_SeqsToMPI *merge; 4568 PetscContainer container; 4569 4570 PetscFunctionBegin; 4571 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4572 4573 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4574 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4575 4576 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4577 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4578 4579 bi = merge->bi; 4580 bj = merge->bj; 4581 buf_ri = merge->buf_ri; 4582 buf_rj = merge->buf_rj; 4583 4584 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4585 owners = merge->rowmap->range; 4586 len_s = merge->len_s; 4587 4588 /* send and recv matrix values */ 4589 /*-----------------------------*/ 4590 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4591 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4592 4593 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4594 for (proc=0,k=0; proc<size; proc++){ 4595 if (!len_s[proc]) continue; 4596 i = owners[proc]; 4597 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4598 k++; 4599 } 4600 4601 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4602 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4603 ierr = PetscFree(status);CHKERRQ(ierr); 4604 4605 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4606 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4607 4608 /* insert mat values of mpimat */ 4609 /*----------------------------*/ 4610 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4611 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4612 4613 for (k=0; k<merge->nrecv; k++){ 4614 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4615 nrows = *(buf_ri_k[k]); 4616 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4617 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4618 } 4619 4620 /* set values of ba */ 4621 m = merge->rowmap->n; 4622 for (i=0; i<m; i++) { 4623 arow = owners[rank] + i; 4624 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4625 bnzi = bi[i+1] - bi[i]; 4626 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4627 4628 /* add local non-zero vals of this proc's seqmat into ba */ 4629 anzi = ai[arow+1] - ai[arow]; 4630 aj = a->j + ai[arow]; 4631 aa = a->a + ai[arow]; 4632 nextaj = 0; 4633 for (j=0; nextaj<anzi; j++){ 4634 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4635 ba_i[j] += aa[nextaj++]; 4636 } 4637 } 4638 4639 /* add received vals into ba */ 4640 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4641 /* i-th row */ 4642 if (i == *nextrow[k]) { 4643 anzi = *(nextai[k]+1) - *nextai[k]; 4644 aj = buf_rj[k] + *(nextai[k]); 4645 aa = abuf_r[k] + *(nextai[k]); 4646 nextaj = 0; 4647 for (j=0; nextaj<anzi; j++){ 4648 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4649 ba_i[j] += aa[nextaj++]; 4650 } 4651 } 4652 nextrow[k]++; nextai[k]++; 4653 } 4654 } 4655 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4656 } 4657 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4658 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4659 4660 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4661 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4662 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4663 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4664 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4665 PetscFunctionReturn(0); 4666 } 4667 4668 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4669 4670 #undef __FUNCT__ 4671 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4672 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4673 { 4674 PetscErrorCode ierr; 4675 Mat B_mpi; 4676 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4677 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4678 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4679 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4680 PetscInt len,proc,*dnz,*onz,bs,cbs; 4681 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4682 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4683 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4684 MPI_Status *status; 4685 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4686 PetscBT lnkbt; 4687 Mat_Merge_SeqsToMPI *merge; 4688 PetscContainer container; 4689 4690 PetscFunctionBegin; 4691 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4692 4693 /* make sure it is a PETSc comm */ 4694 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4695 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4696 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4697 4698 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4699 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4700 4701 /* determine row ownership */ 4702 /*---------------------------------------------------------*/ 4703 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4704 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4705 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4706 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4707 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4708 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4709 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4710 4711 m = merge->rowmap->n; 4712 M = merge->rowmap->N; 4713 owners = merge->rowmap->range; 4714 4715 /* determine the number of messages to send, their lengths */ 4716 /*---------------------------------------------------------*/ 4717 len_s = merge->len_s; 4718 4719 len = 0; /* length of buf_si[] */ 4720 merge->nsend = 0; 4721 for (proc=0; proc<size; proc++){ 4722 len_si[proc] = 0; 4723 if (proc == rank){ 4724 len_s[proc] = 0; 4725 } else { 4726 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4727 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4728 } 4729 if (len_s[proc]) { 4730 merge->nsend++; 4731 nrows = 0; 4732 for (i=owners[proc]; i<owners[proc+1]; i++){ 4733 if (ai[i+1] > ai[i]) nrows++; 4734 } 4735 len_si[proc] = 2*(nrows+1); 4736 len += len_si[proc]; 4737 } 4738 } 4739 4740 /* determine the number and length of messages to receive for ij-structure */ 4741 /*-------------------------------------------------------------------------*/ 4742 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4743 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4744 4745 /* post the Irecv of j-structure */ 4746 /*-------------------------------*/ 4747 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4748 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4749 4750 /* post the Isend of j-structure */ 4751 /*--------------------------------*/ 4752 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4753 4754 for (proc=0, k=0; proc<size; proc++){ 4755 if (!len_s[proc]) continue; 4756 i = owners[proc]; 4757 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4758 k++; 4759 } 4760 4761 /* receives and sends of j-structure are complete */ 4762 /*------------------------------------------------*/ 4763 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4764 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4765 4766 /* send and recv i-structure */ 4767 /*---------------------------*/ 4768 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4769 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4770 4771 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4772 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4773 for (proc=0,k=0; proc<size; proc++){ 4774 if (!len_s[proc]) continue; 4775 /* form outgoing message for i-structure: 4776 buf_si[0]: nrows to be sent 4777 [1:nrows]: row index (global) 4778 [nrows+1:2*nrows+1]: i-structure index 4779 */ 4780 /*-------------------------------------------*/ 4781 nrows = len_si[proc]/2 - 1; 4782 buf_si_i = buf_si + nrows+1; 4783 buf_si[0] = nrows; 4784 buf_si_i[0] = 0; 4785 nrows = 0; 4786 for (i=owners[proc]; i<owners[proc+1]; i++){ 4787 anzi = ai[i+1] - ai[i]; 4788 if (anzi) { 4789 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4790 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4791 nrows++; 4792 } 4793 } 4794 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4795 k++; 4796 buf_si += len_si[proc]; 4797 } 4798 4799 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4800 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4801 4802 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4803 for (i=0; i<merge->nrecv; i++){ 4804 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4805 } 4806 4807 ierr = PetscFree(len_si);CHKERRQ(ierr); 4808 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4809 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4810 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4811 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4812 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4813 ierr = PetscFree(status);CHKERRQ(ierr); 4814 4815 /* compute a local seq matrix in each processor */ 4816 /*----------------------------------------------*/ 4817 /* allocate bi array and free space for accumulating nonzero column info */ 4818 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4819 bi[0] = 0; 4820 4821 /* create and initialize a linked list */ 4822 nlnk = N+1; 4823 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4824 4825 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4826 len = 0; 4827 len = ai[owners[rank+1]] - ai[owners[rank]]; 4828 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4829 current_space = free_space; 4830 4831 /* determine symbolic info for each local row */ 4832 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4833 4834 for (k=0; k<merge->nrecv; k++){ 4835 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4836 nrows = *buf_ri_k[k]; 4837 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4838 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4839 } 4840 4841 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4842 len = 0; 4843 for (i=0;i<m;i++) { 4844 bnzi = 0; 4845 /* add local non-zero cols of this proc's seqmat into lnk */ 4846 arow = owners[rank] + i; 4847 anzi = ai[arow+1] - ai[arow]; 4848 aj = a->j + ai[arow]; 4849 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4850 bnzi += nlnk; 4851 /* add received col data into lnk */ 4852 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4853 if (i == *nextrow[k]) { /* i-th row */ 4854 anzi = *(nextai[k]+1) - *nextai[k]; 4855 aj = buf_rj[k] + *nextai[k]; 4856 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4857 bnzi += nlnk; 4858 nextrow[k]++; nextai[k]++; 4859 } 4860 } 4861 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4862 4863 /* if free space is not available, make more free space */ 4864 if (current_space->local_remaining<bnzi) { 4865 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4866 nspacedouble++; 4867 } 4868 /* copy data into free space, then initialize lnk */ 4869 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4870 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4871 4872 current_space->array += bnzi; 4873 current_space->local_used += bnzi; 4874 current_space->local_remaining -= bnzi; 4875 4876 bi[i+1] = bi[i] + bnzi; 4877 } 4878 4879 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4880 4881 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4882 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4883 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4884 4885 /* create symbolic parallel matrix B_mpi */ 4886 /*---------------------------------------*/ 4887 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4888 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4889 if (n==PETSC_DECIDE) { 4890 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4891 } else { 4892 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4893 } 4894 ierr = MatSetBlockSizes(B_mpi,bs,cbs); CHKERRQ(ierr); 4895 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4896 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4897 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4898 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4899 4900 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4901 B_mpi->assembled = PETSC_FALSE; 4902 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4903 merge->bi = bi; 4904 merge->bj = bj; 4905 merge->buf_ri = buf_ri; 4906 merge->buf_rj = buf_rj; 4907 merge->coi = PETSC_NULL; 4908 merge->coj = PETSC_NULL; 4909 merge->owners_co = PETSC_NULL; 4910 4911 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4912 4913 /* attach the supporting struct to B_mpi for reuse */ 4914 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4915 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4916 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4917 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4918 *mpimat = B_mpi; 4919 4920 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4921 PetscFunctionReturn(0); 4922 } 4923 4924 #undef __FUNCT__ 4925 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4926 /*@C 4927 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4928 matrices from each processor 4929 4930 Collective on MPI_Comm 4931 4932 Input Parameters: 4933 + comm - the communicators the parallel matrix will live on 4934 . seqmat - the input sequential matrices 4935 . m - number of local rows (or PETSC_DECIDE) 4936 . n - number of local columns (or PETSC_DECIDE) 4937 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4938 4939 Output Parameter: 4940 . mpimat - the parallel matrix generated 4941 4942 Level: advanced 4943 4944 Notes: 4945 The dimensions of the sequential matrix in each processor MUST be the same. 4946 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4947 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4948 @*/ 4949 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4950 { 4951 PetscErrorCode ierr; 4952 PetscMPIInt size; 4953 4954 PetscFunctionBegin; 4955 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4956 if (size == 1){ 4957 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4958 if (scall == MAT_INITIAL_MATRIX){ 4959 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4960 } else { 4961 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4962 } 4963 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4964 PetscFunctionReturn(0); 4965 } 4966 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4967 if (scall == MAT_INITIAL_MATRIX){ 4968 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4969 } 4970 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4971 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4972 PetscFunctionReturn(0); 4973 } 4974 4975 #undef __FUNCT__ 4976 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4977 /*@ 4978 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4979 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4980 with MatGetSize() 4981 4982 Not Collective 4983 4984 Input Parameters: 4985 + A - the matrix 4986 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4987 4988 Output Parameter: 4989 . A_loc - the local sequential matrix generated 4990 4991 Level: developer 4992 4993 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4994 4995 @*/ 4996 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4997 { 4998 PetscErrorCode ierr; 4999 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5000 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 5001 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 5002 MatScalar *aa=a->a,*ba=b->a,*cam; 5003 PetscScalar *ca; 5004 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5005 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5006 PetscBool match; 5007 5008 PetscFunctionBegin; 5009 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5010 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5011 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5012 if (scall == MAT_INITIAL_MATRIX){ 5013 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 5014 ci[0] = 0; 5015 for (i=0; i<am; i++){ 5016 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5017 } 5018 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 5019 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 5020 k = 0; 5021 for (i=0; i<am; i++) { 5022 ncols_o = bi[i+1] - bi[i]; 5023 ncols_d = ai[i+1] - ai[i]; 5024 /* off-diagonal portion of A */ 5025 for (jo=0; jo<ncols_o; jo++) { 5026 col = cmap[*bj]; 5027 if (col >= cstart) break; 5028 cj[k] = col; bj++; 5029 ca[k++] = *ba++; 5030 } 5031 /* diagonal portion of A */ 5032 for (j=0; j<ncols_d; j++) { 5033 cj[k] = cstart + *aj++; 5034 ca[k++] = *aa++; 5035 } 5036 /* off-diagonal portion of A */ 5037 for (j=jo; j<ncols_o; j++) { 5038 cj[k] = cmap[*bj++]; 5039 ca[k++] = *ba++; 5040 } 5041 } 5042 /* put together the new matrix */ 5043 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5044 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5045 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5046 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5047 mat->free_a = PETSC_TRUE; 5048 mat->free_ij = PETSC_TRUE; 5049 mat->nonew = 0; 5050 } else if (scall == MAT_REUSE_MATRIX){ 5051 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5052 ci = mat->i; cj = mat->j; cam = mat->a; 5053 for (i=0; i<am; i++) { 5054 /* off-diagonal portion of A */ 5055 ncols_o = bi[i+1] - bi[i]; 5056 for (jo=0; jo<ncols_o; jo++) { 5057 col = cmap[*bj]; 5058 if (col >= cstart) break; 5059 *cam++ = *ba++; bj++; 5060 } 5061 /* diagonal portion of A */ 5062 ncols_d = ai[i+1] - ai[i]; 5063 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5064 /* off-diagonal portion of A */ 5065 for (j=jo; j<ncols_o; j++) { 5066 *cam++ = *ba++; bj++; 5067 } 5068 } 5069 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5070 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5071 PetscFunctionReturn(0); 5072 } 5073 5074 #undef __FUNCT__ 5075 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5076 /*@C 5077 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5078 5079 Not Collective 5080 5081 Input Parameters: 5082 + A - the matrix 5083 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5084 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 5085 5086 Output Parameter: 5087 . A_loc - the local sequential matrix generated 5088 5089 Level: developer 5090 5091 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5092 5093 @*/ 5094 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5095 { 5096 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5097 PetscErrorCode ierr; 5098 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5099 IS isrowa,iscola; 5100 Mat *aloc; 5101 PetscBool match; 5102 5103 PetscFunctionBegin; 5104 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5105 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5106 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5107 if (!row){ 5108 start = A->rmap->rstart; end = A->rmap->rend; 5109 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5110 } else { 5111 isrowa = *row; 5112 } 5113 if (!col){ 5114 start = A->cmap->rstart; 5115 cmap = a->garray; 5116 nzA = a->A->cmap->n; 5117 nzB = a->B->cmap->n; 5118 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5119 ncols = 0; 5120 for (i=0; i<nzB; i++) { 5121 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5122 else break; 5123 } 5124 imark = i; 5125 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5126 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5127 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5128 } else { 5129 iscola = *col; 5130 } 5131 if (scall != MAT_INITIAL_MATRIX){ 5132 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5133 aloc[0] = *A_loc; 5134 } 5135 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5136 *A_loc = aloc[0]; 5137 ierr = PetscFree(aloc);CHKERRQ(ierr); 5138 if (!row){ 5139 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5140 } 5141 if (!col){ 5142 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5143 } 5144 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5145 PetscFunctionReturn(0); 5146 } 5147 5148 #undef __FUNCT__ 5149 #define __FUNCT__ "MatGetBrowsOfAcols" 5150 /*@C 5151 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5152 5153 Collective on Mat 5154 5155 Input Parameters: 5156 + A,B - the matrices in mpiaij format 5157 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5158 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 5159 5160 Output Parameter: 5161 + rowb, colb - index sets of rows and columns of B to extract 5162 - B_seq - the sequential matrix generated 5163 5164 Level: developer 5165 5166 @*/ 5167 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5168 { 5169 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5170 PetscErrorCode ierr; 5171 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5172 IS isrowb,iscolb; 5173 Mat *bseq=PETSC_NULL; 5174 5175 PetscFunctionBegin; 5176 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5177 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5178 } 5179 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5180 5181 if (scall == MAT_INITIAL_MATRIX){ 5182 start = A->cmap->rstart; 5183 cmap = a->garray; 5184 nzA = a->A->cmap->n; 5185 nzB = a->B->cmap->n; 5186 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5187 ncols = 0; 5188 for (i=0; i<nzB; i++) { /* row < local row index */ 5189 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5190 else break; 5191 } 5192 imark = i; 5193 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5194 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5195 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5196 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5197 } else { 5198 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5199 isrowb = *rowb; iscolb = *colb; 5200 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5201 bseq[0] = *B_seq; 5202 } 5203 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5204 *B_seq = bseq[0]; 5205 ierr = PetscFree(bseq);CHKERRQ(ierr); 5206 if (!rowb){ 5207 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5208 } else { 5209 *rowb = isrowb; 5210 } 5211 if (!colb){ 5212 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5213 } else { 5214 *colb = iscolb; 5215 } 5216 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5217 PetscFunctionReturn(0); 5218 } 5219 5220 #undef __FUNCT__ 5221 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5222 /* 5223 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5224 of the OFF-DIAGONAL portion of local A 5225 5226 Collective on Mat 5227 5228 Input Parameters: 5229 + A,B - the matrices in mpiaij format 5230 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5231 5232 Output Parameter: 5233 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5234 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5235 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 5236 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5237 5238 Level: developer 5239 5240 */ 5241 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5242 { 5243 VecScatter_MPI_General *gen_to,*gen_from; 5244 PetscErrorCode ierr; 5245 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5246 Mat_SeqAIJ *b_oth; 5247 VecScatter ctx=a->Mvctx; 5248 MPI_Comm comm=((PetscObject)ctx)->comm; 5249 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5250 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5251 PetscScalar *rvalues,*svalues; 5252 MatScalar *b_otha,*bufa,*bufA; 5253 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5254 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 5255 MPI_Status *sstatus,rstatus; 5256 PetscMPIInt jj; 5257 PetscInt *cols,sbs,rbs; 5258 PetscScalar *vals; 5259 5260 PetscFunctionBegin; 5261 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5262 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5263 } 5264 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5265 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5266 5267 gen_to = (VecScatter_MPI_General*)ctx->todata; 5268 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5269 rvalues = gen_from->values; /* holds the length of receiving row */ 5270 svalues = gen_to->values; /* holds the length of sending row */ 5271 nrecvs = gen_from->n; 5272 nsends = gen_to->n; 5273 5274 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5275 srow = gen_to->indices; /* local row index to be sent */ 5276 sstarts = gen_to->starts; 5277 sprocs = gen_to->procs; 5278 sstatus = gen_to->sstatus; 5279 sbs = gen_to->bs; 5280 rstarts = gen_from->starts; 5281 rprocs = gen_from->procs; 5282 rbs = gen_from->bs; 5283 5284 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5285 if (scall == MAT_INITIAL_MATRIX){ 5286 /* i-array */ 5287 /*---------*/ 5288 /* post receives */ 5289 for (i=0; i<nrecvs; i++){ 5290 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5291 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5292 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5293 } 5294 5295 /* pack the outgoing message */ 5296 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5297 sstartsj[0] = 0; rstartsj[0] = 0; 5298 len = 0; /* total length of j or a array to be sent */ 5299 k = 0; 5300 for (i=0; i<nsends; i++){ 5301 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5302 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5303 for (j=0; j<nrows; j++) { 5304 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5305 for (l=0; l<sbs; l++){ 5306 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 5307 rowlen[j*sbs+l] = ncols; 5308 len += ncols; 5309 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 5310 } 5311 k++; 5312 } 5313 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5314 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5315 } 5316 /* recvs and sends of i-array are completed */ 5317 i = nrecvs; 5318 while (i--) { 5319 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5320 } 5321 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5322 5323 /* allocate buffers for sending j and a arrays */ 5324 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5325 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5326 5327 /* create i-array of B_oth */ 5328 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5329 b_othi[0] = 0; 5330 len = 0; /* total length of j or a array to be received */ 5331 k = 0; 5332 for (i=0; i<nrecvs; i++){ 5333 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5334 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5335 for (j=0; j<nrows; j++) { 5336 b_othi[k+1] = b_othi[k] + rowlen[j]; 5337 len += rowlen[j]; k++; 5338 } 5339 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5340 } 5341 5342 /* allocate space for j and a arrrays of B_oth */ 5343 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5344 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5345 5346 /* j-array */ 5347 /*---------*/ 5348 /* post receives of j-array */ 5349 for (i=0; i<nrecvs; i++){ 5350 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5351 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5352 } 5353 5354 /* pack the outgoing message j-array */ 5355 k = 0; 5356 for (i=0; i<nsends; i++){ 5357 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5358 bufJ = bufj+sstartsj[i]; 5359 for (j=0; j<nrows; j++) { 5360 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5361 for (ll=0; ll<sbs; ll++){ 5362 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5363 for (l=0; l<ncols; l++){ 5364 *bufJ++ = cols[l]; 5365 } 5366 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5367 } 5368 } 5369 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5370 } 5371 5372 /* recvs and sends of j-array are completed */ 5373 i = nrecvs; 5374 while (i--) { 5375 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5376 } 5377 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5378 } else if (scall == MAT_REUSE_MATRIX){ 5379 sstartsj = *startsj_s; 5380 rstartsj = *startsj_r; 5381 bufa = *bufa_ptr; 5382 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5383 b_otha = b_oth->a; 5384 } else { 5385 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5386 } 5387 5388 /* a-array */ 5389 /*---------*/ 5390 /* post receives of a-array */ 5391 for (i=0; i<nrecvs; i++){ 5392 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5393 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5394 } 5395 5396 /* pack the outgoing message a-array */ 5397 k = 0; 5398 for (i=0; i<nsends; i++){ 5399 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5400 bufA = bufa+sstartsj[i]; 5401 for (j=0; j<nrows; j++) { 5402 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5403 for (ll=0; ll<sbs; ll++){ 5404 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5405 for (l=0; l<ncols; l++){ 5406 *bufA++ = vals[l]; 5407 } 5408 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5409 } 5410 } 5411 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5412 } 5413 /* recvs and sends of a-array are completed */ 5414 i = nrecvs; 5415 while (i--) { 5416 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5417 } 5418 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5419 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5420 5421 if (scall == MAT_INITIAL_MATRIX){ 5422 /* put together the new matrix */ 5423 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5424 5425 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5426 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5427 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5428 b_oth->free_a = PETSC_TRUE; 5429 b_oth->free_ij = PETSC_TRUE; 5430 b_oth->nonew = 0; 5431 5432 ierr = PetscFree(bufj);CHKERRQ(ierr); 5433 if (!startsj_s || !bufa_ptr){ 5434 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5435 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5436 } else { 5437 *startsj_s = sstartsj; 5438 *startsj_r = rstartsj; 5439 *bufa_ptr = bufa; 5440 } 5441 } 5442 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5443 PetscFunctionReturn(0); 5444 } 5445 5446 #undef __FUNCT__ 5447 #define __FUNCT__ "MatGetCommunicationStructs" 5448 /*@C 5449 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5450 5451 Not Collective 5452 5453 Input Parameters: 5454 . A - The matrix in mpiaij format 5455 5456 Output Parameter: 5457 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5458 . colmap - A map from global column index to local index into lvec 5459 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5460 5461 Level: developer 5462 5463 @*/ 5464 #if defined (PETSC_USE_CTABLE) 5465 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5466 #else 5467 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5468 #endif 5469 { 5470 Mat_MPIAIJ *a; 5471 5472 PetscFunctionBegin; 5473 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5474 PetscValidPointer(lvec, 2); 5475 PetscValidPointer(colmap, 3); 5476 PetscValidPointer(multScatter, 4); 5477 a = (Mat_MPIAIJ *) A->data; 5478 if (lvec) *lvec = a->lvec; 5479 if (colmap) *colmap = a->colmap; 5480 if (multScatter) *multScatter = a->Mvctx; 5481 PetscFunctionReturn(0); 5482 } 5483 5484 EXTERN_C_BEGIN 5485 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5486 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5487 extern PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5488 EXTERN_C_END 5489 5490 #undef __FUNCT__ 5491 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5492 /* 5493 Computes (B'*A')' since computing B*A directly is untenable 5494 5495 n p p 5496 ( ) ( ) ( ) 5497 m ( A ) * n ( B ) = m ( C ) 5498 ( ) ( ) ( ) 5499 5500 */ 5501 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5502 { 5503 PetscErrorCode ierr; 5504 Mat At,Bt,Ct; 5505 5506 PetscFunctionBegin; 5507 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5508 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5509 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5510 ierr = MatDestroy(&At);CHKERRQ(ierr); 5511 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5512 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5513 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5514 PetscFunctionReturn(0); 5515 } 5516 5517 #undef __FUNCT__ 5518 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5519 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5520 { 5521 PetscErrorCode ierr; 5522 PetscInt m=A->rmap->n,n=B->cmap->n; 5523 Mat Cmat; 5524 5525 PetscFunctionBegin; 5526 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5527 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5528 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5529 ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 5530 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5531 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5532 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5533 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5534 *C = Cmat; 5535 (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ; 5536 PetscFunctionReturn(0); 5537 } 5538 5539 /* ----------------------------------------------------------------*/ 5540 #undef __FUNCT__ 5541 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5542 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5543 { 5544 PetscErrorCode ierr; 5545 5546 PetscFunctionBegin; 5547 if (scall == MAT_INITIAL_MATRIX){ 5548 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5549 } 5550 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5551 PetscFunctionReturn(0); 5552 } 5553 5554 EXTERN_C_BEGIN 5555 #if defined(PETSC_HAVE_MUMPS) 5556 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5557 #endif 5558 #if defined(PETSC_HAVE_PASTIX) 5559 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5560 #endif 5561 #if defined(PETSC_HAVE_SUPERLU_DIST) 5562 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5563 #endif 5564 #if defined(PETSC_HAVE_CLIQUE) 5565 extern PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*); 5566 #endif 5567 EXTERN_C_END 5568 5569 /*MC 5570 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5571 5572 Options Database Keys: 5573 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5574 5575 Level: beginner 5576 5577 .seealso: MatCreateAIJ() 5578 M*/ 5579 5580 EXTERN_C_BEGIN 5581 #undef __FUNCT__ 5582 #define __FUNCT__ "MatCreate_MPIAIJ" 5583 PetscErrorCode MatCreate_MPIAIJ(Mat B) 5584 { 5585 Mat_MPIAIJ *b; 5586 PetscErrorCode ierr; 5587 PetscMPIInt size; 5588 5589 PetscFunctionBegin; 5590 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5591 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5592 B->data = (void*)b; 5593 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5594 B->assembled = PETSC_FALSE; 5595 B->insertmode = NOT_SET_VALUES; 5596 b->size = size; 5597 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5598 5599 /* build cache for off array entries formed */ 5600 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5601 b->donotstash = PETSC_FALSE; 5602 b->colmap = 0; 5603 b->garray = 0; 5604 b->roworiented = PETSC_TRUE; 5605 5606 /* stuff used for matrix vector multiply */ 5607 b->lvec = PETSC_NULL; 5608 b->Mvctx = PETSC_NULL; 5609 5610 /* stuff for MatGetRow() */ 5611 b->rowindices = 0; 5612 b->rowvalues = 0; 5613 b->getrowactive = PETSC_FALSE; 5614 5615 /* flexible pointer used in CUSP/CUSPARSE classes */ 5616 b->spptr = PETSC_NULL; 5617 5618 #if defined(PETSC_HAVE_MUMPS) 5619 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5620 "MatGetFactor_aij_mumps", 5621 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5622 #endif 5623 #if defined(PETSC_HAVE_PASTIX) 5624 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5625 "MatGetFactor_mpiaij_pastix", 5626 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5627 #endif 5628 #if defined(PETSC_HAVE_SUPERLU_DIST) 5629 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5630 "MatGetFactor_mpiaij_superlu_dist", 5631 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5632 #endif 5633 #if defined(PETSC_HAVE_CLIQUE) 5634 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_clique_C", 5635 "MatGetFactor_aij_clique", 5636 MatGetFactor_aij_clique);CHKERRQ(ierr); 5637 #endif 5638 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5639 "MatStoreValues_MPIAIJ", 5640 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5641 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5642 "MatRetrieveValues_MPIAIJ", 5643 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5644 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5645 "MatGetDiagonalBlock_MPIAIJ", 5646 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5647 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5648 "MatIsTranspose_MPIAIJ", 5649 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5650 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5651 "MatMPIAIJSetPreallocation_MPIAIJ", 5652 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5653 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5654 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5655 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5656 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5657 "MatDiagonalScaleLocal_MPIAIJ", 5658 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5659 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5660 "MatConvert_MPIAIJ_MPIAIJPERM", 5661 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5662 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5663 "MatConvert_MPIAIJ_MPIAIJCRL", 5664 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5665 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5666 "MatConvert_MPIAIJ_MPISBAIJ", 5667 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5668 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5669 "MatMatMult_MPIDense_MPIAIJ", 5670 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5671 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5672 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5673 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5674 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5675 "MatMatMultNumeric_MPIDense_MPIAIJ", 5676 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5677 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5678 PetscFunctionReturn(0); 5679 } 5680 EXTERN_C_END 5681 5682 #undef __FUNCT__ 5683 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5684 /*@ 5685 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5686 and "off-diagonal" part of the matrix in CSR format. 5687 5688 Collective on MPI_Comm 5689 5690 Input Parameters: 5691 + comm - MPI communicator 5692 . m - number of local rows (Cannot be PETSC_DECIDE) 5693 . n - This value should be the same as the local size used in creating the 5694 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5695 calculated if N is given) For square matrices n is almost always m. 5696 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5697 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5698 . i - row indices for "diagonal" portion of matrix 5699 . j - column indices 5700 . a - matrix values 5701 . oi - row indices for "off-diagonal" portion of matrix 5702 . oj - column indices 5703 - oa - matrix values 5704 5705 Output Parameter: 5706 . mat - the matrix 5707 5708 Level: advanced 5709 5710 Notes: 5711 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5712 must free the arrays once the matrix has been destroyed and not before. 5713 5714 The i and j indices are 0 based 5715 5716 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5717 5718 This sets local rows and cannot be used to set off-processor values. 5719 5720 You cannot later use MatSetValues() to change values in this matrix. 5721 5722 .keywords: matrix, aij, compressed row, sparse, parallel 5723 5724 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5725 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5726 @*/ 5727 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5728 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5729 { 5730 PetscErrorCode ierr; 5731 Mat_MPIAIJ *maij; 5732 5733 PetscFunctionBegin; 5734 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5735 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5736 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5737 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5738 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5739 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5740 maij = (Mat_MPIAIJ*) (*mat)->data; 5741 maij->donotstash = PETSC_TRUE; 5742 (*mat)->preallocated = PETSC_TRUE; 5743 5744 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5745 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5746 5747 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5748 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5749 5750 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5751 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5752 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5753 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5754 5755 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5756 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5757 PetscFunctionReturn(0); 5758 } 5759 5760 /* 5761 Special version for direct calls from Fortran 5762 */ 5763 #include <petsc-private/fortranimpl.h> 5764 5765 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5766 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5767 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5768 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5769 #endif 5770 5771 /* Change these macros so can be used in void function */ 5772 #undef CHKERRQ 5773 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5774 #undef SETERRQ2 5775 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5776 #undef SETERRQ3 5777 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5778 #undef SETERRQ 5779 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5780 5781 EXTERN_C_BEGIN 5782 #undef __FUNCT__ 5783 #define __FUNCT__ "matsetvaluesmpiaij_" 5784 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5785 { 5786 Mat mat = *mmat; 5787 PetscInt m = *mm, n = *mn; 5788 InsertMode addv = *maddv; 5789 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5790 PetscScalar value; 5791 PetscErrorCode ierr; 5792 5793 MatCheckPreallocated(mat,1); 5794 if (mat->insertmode == NOT_SET_VALUES) { 5795 mat->insertmode = addv; 5796 } 5797 #if defined(PETSC_USE_DEBUG) 5798 else if (mat->insertmode != addv) { 5799 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5800 } 5801 #endif 5802 { 5803 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5804 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5805 PetscBool roworiented = aij->roworiented; 5806 5807 /* Some Variables required in the macro */ 5808 Mat A = aij->A; 5809 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5810 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5811 MatScalar *aa = a->a; 5812 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5813 Mat B = aij->B; 5814 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5815 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5816 MatScalar *ba = b->a; 5817 5818 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5819 PetscInt nonew = a->nonew; 5820 MatScalar *ap1,*ap2; 5821 5822 PetscFunctionBegin; 5823 for (i=0; i<m; i++) { 5824 if (im[i] < 0) continue; 5825 #if defined(PETSC_USE_DEBUG) 5826 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5827 #endif 5828 if (im[i] >= rstart && im[i] < rend) { 5829 row = im[i] - rstart; 5830 lastcol1 = -1; 5831 rp1 = aj + ai[row]; 5832 ap1 = aa + ai[row]; 5833 rmax1 = aimax[row]; 5834 nrow1 = ailen[row]; 5835 low1 = 0; 5836 high1 = nrow1; 5837 lastcol2 = -1; 5838 rp2 = bj + bi[row]; 5839 ap2 = ba + bi[row]; 5840 rmax2 = bimax[row]; 5841 nrow2 = bilen[row]; 5842 low2 = 0; 5843 high2 = nrow2; 5844 5845 for (j=0; j<n; j++) { 5846 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5847 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5848 if (in[j] >= cstart && in[j] < cend){ 5849 col = in[j] - cstart; 5850 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5851 } else if (in[j] < 0) continue; 5852 #if defined(PETSC_USE_DEBUG) 5853 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5854 #endif 5855 else { 5856 if (mat->was_assembled) { 5857 if (!aij->colmap) { 5858 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5859 } 5860 #if defined (PETSC_USE_CTABLE) 5861 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5862 col--; 5863 #else 5864 col = aij->colmap[in[j]] - 1; 5865 #endif 5866 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5867 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5868 col = in[j]; 5869 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5870 B = aij->B; 5871 b = (Mat_SeqAIJ*)B->data; 5872 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5873 rp2 = bj + bi[row]; 5874 ap2 = ba + bi[row]; 5875 rmax2 = bimax[row]; 5876 nrow2 = bilen[row]; 5877 low2 = 0; 5878 high2 = nrow2; 5879 bm = aij->B->rmap->n; 5880 ba = b->a; 5881 } 5882 } else col = in[j]; 5883 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5884 } 5885 } 5886 } else { 5887 if (!aij->donotstash) { 5888 if (roworiented) { 5889 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5890 } else { 5891 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5892 } 5893 } 5894 } 5895 }} 5896 PetscFunctionReturnVoid(); 5897 } 5898 EXTERN_C_END 5899 5900