1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 /*MC 6 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 7 8 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 9 and MATMPIAIJ otherwise. As a result, for single process communicators, 10 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 11 for communicators controlling multiple processes. It is recommended that you call both of 12 the above preallocation routines for simplicity. 13 14 Options Database Keys: 15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 16 17 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 18 enough exist. 19 20 Level: beginner 21 22 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 23 M*/ 24 25 /*MC 26 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 27 28 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 29 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 30 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 31 for communicators controlling multiple processes. It is recommended that you call both of 32 the above preallocation routines for simplicity. 33 34 Options Database Keys: 35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 36 37 Level: beginner 38 39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 40 M*/ 41 42 #undef __FUNCT__ 43 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 44 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 45 { 46 PetscErrorCode ierr; 47 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 48 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 49 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 50 const PetscInt *ia,*ib; 51 const MatScalar *aa,*bb; 52 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 53 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 54 55 PetscFunctionBegin; 56 *keptrows = 0; 57 ia = a->i; 58 ib = b->i; 59 for (i=0; i<m; i++) { 60 na = ia[i+1] - ia[i]; 61 nb = ib[i+1] - ib[i]; 62 if (!na && !nb) { 63 cnt++; 64 goto ok1; 65 } 66 aa = a->a + ia[i]; 67 for (j=0; j<na; j++) { 68 if (aa[j] != 0.0) goto ok1; 69 } 70 bb = b->a + ib[i]; 71 for (j=0; j <nb; j++) { 72 if (bb[j] != 0.0) goto ok1; 73 } 74 cnt++; 75 ok1:; 76 } 77 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr); 78 if (!n0rows) PetscFunctionReturn(0); 79 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 80 cnt = 0; 81 for (i=0; i<m; i++) { 82 na = ia[i+1] - ia[i]; 83 nb = ib[i+1] - ib[i]; 84 if (!na && !nb) continue; 85 aa = a->a + ia[i]; 86 for (j=0; j<na;j++) { 87 if (aa[j] != 0.0) { 88 rows[cnt++] = rstart + i; 89 goto ok2; 90 } 91 } 92 bb = b->a + ib[i]; 93 for (j=0; j<nb; j++) { 94 if (bb[j] != 0.0) { 95 rows[cnt++] = rstart + i; 96 goto ok2; 97 } 98 } 99 ok2:; 100 } 101 ierr = ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 102 PetscFunctionReturn(0); 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 107 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 108 { 109 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 110 PetscErrorCode ierr; 111 PetscInt i,rstart,nrows,*rows; 112 113 PetscFunctionBegin; 114 *zrows = PETSC_NULL; 115 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 116 ierr = MatGetOwnershipRange(M,&rstart,PETSC_NULL);CHKERRQ(ierr); 117 for (i=0; i<nrows; i++) rows[i] += rstart; 118 ierr = ISCreateGeneral(((PetscObject)M)->comm,nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 119 PetscFunctionReturn(0); 120 } 121 122 #undef __FUNCT__ 123 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 124 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 125 { 126 PetscErrorCode ierr; 127 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 128 PetscInt i,n,*garray = aij->garray; 129 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 130 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 131 PetscReal *work; 132 133 PetscFunctionBegin; 134 ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr); 135 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 136 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 137 if (type == NORM_2) { 138 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 139 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 140 } 141 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 142 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 143 } 144 } else if (type == NORM_1) { 145 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 146 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 147 } 148 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 149 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 150 } 151 } else if (type == NORM_INFINITY) { 152 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 153 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 154 } 155 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 156 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 157 } 158 159 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType"); 160 if (type == NORM_INFINITY) { 161 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 162 } else { 163 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 164 } 165 ierr = PetscFree(work);CHKERRQ(ierr); 166 if (type == NORM_2) { 167 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 168 } 169 PetscFunctionReturn(0); 170 } 171 172 #undef __FUNCT__ 173 #define __FUNCT__ "MatDistribute_MPIAIJ" 174 /* 175 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 176 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 177 178 Only for square matrices 179 */ 180 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 181 { 182 PetscMPIInt rank,size; 183 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld,bses[2]; 184 PetscErrorCode ierr; 185 Mat mat; 186 Mat_SeqAIJ *gmata; 187 PetscMPIInt tag; 188 MPI_Status status; 189 PetscBool aij; 190 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 191 192 PetscFunctionBegin; 193 CHKMEMQ; 194 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 195 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 196 if (!rank) { 197 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 198 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 199 } 200 if (reuse == MAT_INITIAL_MATRIX) { 201 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 202 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 203 if (!rank) { 204 bses[0] = gmat->rmap->bs; 205 bses[1] = gmat->cmap->bs; 206 } 207 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 208 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 209 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 210 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 211 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 212 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 213 rowners[0] = 0; 214 for (i=2; i<=size; i++) { 215 rowners[i] += rowners[i-1]; 216 } 217 rstart = rowners[rank]; 218 rend = rowners[rank+1]; 219 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 220 if (!rank) { 221 gmata = (Mat_SeqAIJ*) gmat->data; 222 /* send row lengths to all processors */ 223 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 224 for (i=1; i<size; i++) { 225 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 226 } 227 /* determine number diagonal and off-diagonal counts */ 228 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 229 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 230 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 231 jj = 0; 232 for (i=0; i<m; i++) { 233 for (j=0; j<dlens[i]; j++) { 234 if (gmata->j[jj] < rstart) ld[i]++; 235 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 236 jj++; 237 } 238 } 239 /* send column indices to other processes */ 240 for (i=1; i<size; i++) { 241 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 242 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 243 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 244 } 245 246 /* send numerical values to other processes */ 247 for (i=1; i<size; i++) { 248 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 249 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 250 } 251 gmataa = gmata->a; 252 gmataj = gmata->j; 253 254 } else { 255 /* receive row lengths */ 256 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 257 /* receive column indices */ 258 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 259 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 260 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 261 /* determine number diagonal and off-diagonal counts */ 262 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 263 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 264 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 265 jj = 0; 266 for (i=0; i<m; i++) { 267 for (j=0; j<dlens[i]; j++) { 268 if (gmataj[jj] < rstart) ld[i]++; 269 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 270 jj++; 271 } 272 } 273 /* receive numerical values */ 274 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 275 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 276 } 277 /* set preallocation */ 278 for (i=0; i<m; i++) { 279 dlens[i] -= olens[i]; 280 } 281 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 282 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 283 284 for (i=0; i<m; i++) { 285 dlens[i] += olens[i]; 286 } 287 cnt = 0; 288 for (i=0; i<m; i++) { 289 row = rstart + i; 290 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 291 cnt += dlens[i]; 292 } 293 if (rank) { 294 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 295 } 296 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 297 ierr = PetscFree(rowners);CHKERRQ(ierr); 298 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 299 *inmat = mat; 300 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 301 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 302 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 303 mat = *inmat; 304 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 305 if (!rank) { 306 /* send numerical values to other processes */ 307 gmata = (Mat_SeqAIJ*) gmat->data; 308 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 309 gmataa = gmata->a; 310 for (i=1; i<size; i++) { 311 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 312 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 313 } 314 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 315 } else { 316 /* receive numerical values from process 0*/ 317 nz = Ad->nz + Ao->nz; 318 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 319 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 320 } 321 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 322 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 323 ad = Ad->a; 324 ao = Ao->a; 325 if (mat->rmap->n) { 326 i = 0; 327 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 328 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 329 } 330 for (i=1; i<mat->rmap->n; i++) { 331 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 332 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 333 } 334 i--; 335 if (mat->rmap->n) { 336 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 337 } 338 if (rank) { 339 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 340 } 341 } 342 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 343 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 344 CHKMEMQ; 345 PetscFunctionReturn(0); 346 } 347 348 /* 349 Local utility routine that creates a mapping from the global column 350 number to the local number in the off-diagonal part of the local 351 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 352 a slightly higher hash table cost; without it it is not scalable (each processor 353 has an order N integer array but is fast to acess. 354 */ 355 #undef __FUNCT__ 356 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 357 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 358 { 359 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 360 PetscErrorCode ierr; 361 PetscInt n = aij->B->cmap->n,i; 362 363 PetscFunctionBegin; 364 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 365 #if defined (PETSC_USE_CTABLE) 366 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 367 for (i=0; i<n; i++){ 368 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 369 } 370 #else 371 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 372 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 373 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 374 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 375 #endif 376 PetscFunctionReturn(0); 377 } 378 379 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 380 { \ 381 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 382 lastcol1 = col;\ 383 while (high1-low1 > 5) { \ 384 t = (low1+high1)/2; \ 385 if (rp1[t] > col) high1 = t; \ 386 else low1 = t; \ 387 } \ 388 for (_i=low1; _i<high1; _i++) { \ 389 if (rp1[_i] > col) break; \ 390 if (rp1[_i] == col) { \ 391 if (addv == ADD_VALUES) ap1[_i] += value; \ 392 else ap1[_i] = value; \ 393 goto a_noinsert; \ 394 } \ 395 } \ 396 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 397 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 398 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 399 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 400 N = nrow1++ - 1; a->nz++; high1++; \ 401 /* shift up all the later entries in this row */ \ 402 for (ii=N; ii>=_i; ii--) { \ 403 rp1[ii+1] = rp1[ii]; \ 404 ap1[ii+1] = ap1[ii]; \ 405 } \ 406 rp1[_i] = col; \ 407 ap1[_i] = value; \ 408 a_noinsert: ; \ 409 ailen[row] = nrow1; \ 410 } 411 412 413 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 414 { \ 415 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 416 lastcol2 = col;\ 417 while (high2-low2 > 5) { \ 418 t = (low2+high2)/2; \ 419 if (rp2[t] > col) high2 = t; \ 420 else low2 = t; \ 421 } \ 422 for (_i=low2; _i<high2; _i++) { \ 423 if (rp2[_i] > col) break; \ 424 if (rp2[_i] == col) { \ 425 if (addv == ADD_VALUES) ap2[_i] += value; \ 426 else ap2[_i] = value; \ 427 goto b_noinsert; \ 428 } \ 429 } \ 430 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 431 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 432 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 433 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 434 N = nrow2++ - 1; b->nz++; high2++; \ 435 /* shift up all the later entries in this row */ \ 436 for (ii=N; ii>=_i; ii--) { \ 437 rp2[ii+1] = rp2[ii]; \ 438 ap2[ii+1] = ap2[ii]; \ 439 } \ 440 rp2[_i] = col; \ 441 ap2[_i] = value; \ 442 b_noinsert: ; \ 443 bilen[row] = nrow2; \ 444 } 445 446 #undef __FUNCT__ 447 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 448 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 449 { 450 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 451 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 452 PetscErrorCode ierr; 453 PetscInt l,*garray = mat->garray,diag; 454 455 PetscFunctionBegin; 456 /* code only works for square matrices A */ 457 458 /* find size of row to the left of the diagonal part */ 459 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 460 row = row - diag; 461 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 462 if (garray[b->j[b->i[row]+l]] > diag) break; 463 } 464 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 465 466 /* diagonal part */ 467 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 468 469 /* right of diagonal part */ 470 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 471 PetscFunctionReturn(0); 472 } 473 474 #undef __FUNCT__ 475 #define __FUNCT__ "MatSetValues_MPIAIJ" 476 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 477 { 478 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 479 PetscScalar value; 480 PetscErrorCode ierr; 481 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 482 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 483 PetscBool roworiented = aij->roworiented; 484 485 /* Some Variables required in the macro */ 486 Mat A = aij->A; 487 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 488 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 489 MatScalar *aa = a->a; 490 PetscBool ignorezeroentries = a->ignorezeroentries; 491 Mat B = aij->B; 492 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 493 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 494 MatScalar *ba = b->a; 495 496 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 497 PetscInt nonew; 498 MatScalar *ap1,*ap2; 499 500 PetscFunctionBegin; 501 if (v) PetscValidScalarPointer(v,6); 502 for (i=0; i<m; i++) { 503 if (im[i] < 0) continue; 504 #if defined(PETSC_USE_DEBUG) 505 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 506 #endif 507 if (im[i] >= rstart && im[i] < rend) { 508 row = im[i] - rstart; 509 lastcol1 = -1; 510 rp1 = aj + ai[row]; 511 ap1 = aa + ai[row]; 512 rmax1 = aimax[row]; 513 nrow1 = ailen[row]; 514 low1 = 0; 515 high1 = nrow1; 516 lastcol2 = -1; 517 rp2 = bj + bi[row]; 518 ap2 = ba + bi[row]; 519 rmax2 = bimax[row]; 520 nrow2 = bilen[row]; 521 low2 = 0; 522 high2 = nrow2; 523 524 for (j=0; j<n; j++) { 525 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 526 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 527 if (in[j] >= cstart && in[j] < cend){ 528 col = in[j] - cstart; 529 nonew = a->nonew; 530 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 531 } else if (in[j] < 0) continue; 532 #if defined(PETSC_USE_DEBUG) 533 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 534 #endif 535 else { 536 if (mat->was_assembled) { 537 if (!aij->colmap) { 538 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 539 } 540 #if defined (PETSC_USE_CTABLE) 541 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 542 col--; 543 #else 544 col = aij->colmap[in[j]] - 1; 545 #endif 546 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 547 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 548 col = in[j]; 549 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 550 B = aij->B; 551 b = (Mat_SeqAIJ*)B->data; 552 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 553 rp2 = bj + bi[row]; 554 ap2 = ba + bi[row]; 555 rmax2 = bimax[row]; 556 nrow2 = bilen[row]; 557 low2 = 0; 558 high2 = nrow2; 559 bm = aij->B->rmap->n; 560 ba = b->a; 561 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 562 } else col = in[j]; 563 nonew = b->nonew; 564 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 565 } 566 } 567 } else { 568 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 569 if (!aij->donotstash) { 570 mat->assembled = PETSC_FALSE; 571 if (roworiented) { 572 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 573 } else { 574 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 575 } 576 } 577 } 578 } 579 PetscFunctionReturn(0); 580 } 581 582 #undef __FUNCT__ 583 #define __FUNCT__ "MatGetValues_MPIAIJ" 584 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 585 { 586 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 587 PetscErrorCode ierr; 588 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 589 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 590 591 PetscFunctionBegin; 592 for (i=0; i<m; i++) { 593 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 594 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 595 if (idxm[i] >= rstart && idxm[i] < rend) { 596 row = idxm[i] - rstart; 597 for (j=0; j<n; j++) { 598 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 599 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 600 if (idxn[j] >= cstart && idxn[j] < cend){ 601 col = idxn[j] - cstart; 602 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 603 } else { 604 if (!aij->colmap) { 605 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 606 } 607 #if defined (PETSC_USE_CTABLE) 608 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 609 col --; 610 #else 611 col = aij->colmap[idxn[j]] - 1; 612 #endif 613 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 614 else { 615 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 616 } 617 } 618 } 619 } else { 620 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 621 } 622 } 623 PetscFunctionReturn(0); 624 } 625 626 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 627 628 #undef __FUNCT__ 629 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 630 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 631 { 632 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 633 PetscErrorCode ierr; 634 PetscInt nstash,reallocs; 635 InsertMode addv; 636 637 PetscFunctionBegin; 638 if (aij->donotstash || mat->nooffprocentries) { 639 PetscFunctionReturn(0); 640 } 641 642 /* make sure all processors are either in INSERTMODE or ADDMODE */ 643 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 644 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 645 mat->insertmode = addv; /* in case this processor had no cache */ 646 647 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 648 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 649 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 650 PetscFunctionReturn(0); 651 } 652 653 #undef __FUNCT__ 654 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 655 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 656 { 657 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 658 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 659 PetscErrorCode ierr; 660 PetscMPIInt n; 661 PetscInt i,j,rstart,ncols,flg; 662 PetscInt *row,*col; 663 PetscBool other_disassembled; 664 PetscScalar *val; 665 InsertMode addv = mat->insertmode; 666 667 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 668 PetscFunctionBegin; 669 if (!aij->donotstash && !mat->nooffprocentries) { 670 while (1) { 671 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 672 if (!flg) break; 673 674 for (i=0; i<n;) { 675 /* Now identify the consecutive vals belonging to the same row */ 676 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 677 if (j < n) ncols = j-i; 678 else ncols = n-i; 679 /* Now assemble all these values with a single function call */ 680 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 681 i = j; 682 } 683 } 684 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 685 } 686 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 687 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 688 689 /* determine if any processor has disassembled, if so we must 690 also disassemble ourselfs, in order that we may reassemble. */ 691 /* 692 if nonzero structure of submatrix B cannot change then we know that 693 no processor disassembled thus we can skip this stuff 694 */ 695 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 696 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 697 if (mat->was_assembled && !other_disassembled) { 698 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 699 } 700 } 701 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 702 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 703 } 704 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 705 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 706 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 707 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 708 709 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 710 aij->rowvalues = 0; 711 712 /* used by MatAXPY() */ 713 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 714 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 715 716 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 717 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 718 PetscFunctionReturn(0); 719 } 720 721 #undef __FUNCT__ 722 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 723 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 724 { 725 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 726 PetscErrorCode ierr; 727 728 PetscFunctionBegin; 729 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 730 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 731 PetscFunctionReturn(0); 732 } 733 734 #undef __FUNCT__ 735 #define __FUNCT__ "MatZeroRows_MPIAIJ" 736 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 737 { 738 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 739 PetscErrorCode ierr; 740 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 741 PetscInt i,*owners = A->rmap->range; 742 PetscInt *nprocs,j,idx,nsends,row; 743 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 744 PetscInt *rvalues,count,base,slen,*source; 745 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 746 MPI_Comm comm = ((PetscObject)A)->comm; 747 MPI_Request *send_waits,*recv_waits; 748 MPI_Status recv_status,*send_status; 749 const PetscScalar *xx; 750 PetscScalar *bb; 751 #if defined(PETSC_DEBUG) 752 PetscBool found = PETSC_FALSE; 753 #endif 754 755 PetscFunctionBegin; 756 /* first count number of contributors to each processor */ 757 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 758 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 759 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 760 j = 0; 761 for (i=0; i<N; i++) { 762 if (lastidx > (idx = rows[i])) j = 0; 763 lastidx = idx; 764 for (; j<size; j++) { 765 if (idx >= owners[j] && idx < owners[j+1]) { 766 nprocs[2*j]++; 767 nprocs[2*j+1] = 1; 768 owner[i] = j; 769 #if defined(PETSC_DEBUG) 770 found = PETSC_TRUE; 771 #endif 772 break; 773 } 774 } 775 #if defined(PETSC_DEBUG) 776 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 777 found = PETSC_FALSE; 778 #endif 779 } 780 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 781 782 if (A->nooffproczerorows) { 783 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 784 nrecvs = nsends; 785 nmax = N; 786 } else { 787 /* inform other processors of number of messages and max length*/ 788 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 789 } 790 791 /* post receives: */ 792 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 793 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 794 for (i=0; i<nrecvs; i++) { 795 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 796 } 797 798 /* do sends: 799 1) starts[i] gives the starting index in svalues for stuff going to 800 the ith processor 801 */ 802 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 803 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 804 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 805 starts[0] = 0; 806 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 807 for (i=0; i<N; i++) { 808 svalues[starts[owner[i]]++] = rows[i]; 809 } 810 811 starts[0] = 0; 812 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 813 count = 0; 814 for (i=0; i<size; i++) { 815 if (nprocs[2*i+1]) { 816 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 817 } 818 } 819 ierr = PetscFree(starts);CHKERRQ(ierr); 820 821 base = owners[rank]; 822 823 /* wait on receives */ 824 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 825 count = nrecvs; slen = 0; 826 while (count) { 827 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 828 /* unpack receives into our local space */ 829 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 830 source[imdex] = recv_status.MPI_SOURCE; 831 lens[imdex] = n; 832 slen += n; 833 count--; 834 } 835 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 836 837 /* move the data into the send scatter */ 838 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 839 count = 0; 840 for (i=0; i<nrecvs; i++) { 841 values = rvalues + i*nmax; 842 for (j=0; j<lens[i]; j++) { 843 lrows[count++] = values[j] - base; 844 } 845 } 846 ierr = PetscFree(rvalues);CHKERRQ(ierr); 847 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 848 ierr = PetscFree(owner);CHKERRQ(ierr); 849 ierr = PetscFree(nprocs);CHKERRQ(ierr); 850 851 /* fix right hand side if needed */ 852 if (x && b) { 853 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 854 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 855 for (i=0; i<slen; i++) { 856 bb[lrows[i]] = diag*xx[lrows[i]]; 857 } 858 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 859 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 860 } 861 /* 862 Zero the required rows. If the "diagonal block" of the matrix 863 is square and the user wishes to set the diagonal we use separate 864 code so that MatSetValues() is not called for each diagonal allocating 865 new memory, thus calling lots of mallocs and slowing things down. 866 867 */ 868 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 869 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 870 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 871 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 872 } else if (diag != 0.0) { 873 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 874 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 875 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 876 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 877 } 878 for (i = 0; i < slen; i++) { 879 row = lrows[i] + rstart; 880 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 881 } 882 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 883 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 884 } else { 885 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 886 } 887 ierr = PetscFree(lrows);CHKERRQ(ierr); 888 889 /* wait on sends */ 890 if (nsends) { 891 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 892 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 893 ierr = PetscFree(send_status);CHKERRQ(ierr); 894 } 895 ierr = PetscFree(send_waits);CHKERRQ(ierr); 896 ierr = PetscFree(svalues);CHKERRQ(ierr); 897 PetscFunctionReturn(0); 898 } 899 900 #undef __FUNCT__ 901 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 902 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 903 { 904 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 905 PetscErrorCode ierr; 906 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 907 PetscInt i,*owners = A->rmap->range; 908 PetscInt *nprocs,j,idx,nsends; 909 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 910 PetscInt *rvalues,count,base,slen,*source; 911 PetscInt *lens,*lrows,*values,m; 912 MPI_Comm comm = ((PetscObject)A)->comm; 913 MPI_Request *send_waits,*recv_waits; 914 MPI_Status recv_status,*send_status; 915 const PetscScalar *xx; 916 PetscScalar *bb,*mask; 917 Vec xmask,lmask; 918 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 919 const PetscInt *aj, *ii,*ridx; 920 PetscScalar *aa; 921 #if defined(PETSC_DEBUG) 922 PetscBool found = PETSC_FALSE; 923 #endif 924 925 PetscFunctionBegin; 926 /* first count number of contributors to each processor */ 927 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 928 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 929 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 930 j = 0; 931 for (i=0; i<N; i++) { 932 if (lastidx > (idx = rows[i])) j = 0; 933 lastidx = idx; 934 for (; j<size; j++) { 935 if (idx >= owners[j] && idx < owners[j+1]) { 936 nprocs[2*j]++; 937 nprocs[2*j+1] = 1; 938 owner[i] = j; 939 #if defined(PETSC_DEBUG) 940 found = PETSC_TRUE; 941 #endif 942 break; 943 } 944 } 945 #if defined(PETSC_DEBUG) 946 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 947 found = PETSC_FALSE; 948 #endif 949 } 950 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 951 952 /* inform other processors of number of messages and max length*/ 953 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 954 955 /* post receives: */ 956 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 957 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 958 for (i=0; i<nrecvs; i++) { 959 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 960 } 961 962 /* do sends: 963 1) starts[i] gives the starting index in svalues for stuff going to 964 the ith processor 965 */ 966 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 967 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 968 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 969 starts[0] = 0; 970 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 971 for (i=0; i<N; i++) { 972 svalues[starts[owner[i]]++] = rows[i]; 973 } 974 975 starts[0] = 0; 976 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 977 count = 0; 978 for (i=0; i<size; i++) { 979 if (nprocs[2*i+1]) { 980 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 981 } 982 } 983 ierr = PetscFree(starts);CHKERRQ(ierr); 984 985 base = owners[rank]; 986 987 /* wait on receives */ 988 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 989 count = nrecvs; slen = 0; 990 while (count) { 991 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 992 /* unpack receives into our local space */ 993 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 994 source[imdex] = recv_status.MPI_SOURCE; 995 lens[imdex] = n; 996 slen += n; 997 count--; 998 } 999 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1000 1001 /* move the data into the send scatter */ 1002 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1003 count = 0; 1004 for (i=0; i<nrecvs; i++) { 1005 values = rvalues + i*nmax; 1006 for (j=0; j<lens[i]; j++) { 1007 lrows[count++] = values[j] - base; 1008 } 1009 } 1010 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1011 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1012 ierr = PetscFree(owner);CHKERRQ(ierr); 1013 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1014 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 1015 1016 /* zero diagonal part of matrix */ 1017 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 1018 1019 /* handle off diagonal part of matrix */ 1020 ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr); 1021 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1022 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1023 for (i=0; i<slen; i++) { 1024 bb[lrows[i]] = 1; 1025 } 1026 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1027 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1028 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1029 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1030 if (x) { 1031 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1032 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1033 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1034 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1035 } 1036 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1037 1038 /* remove zeroed rows of off diagonal matrix */ 1039 ii = aij->i; 1040 for (i=0; i<slen; i++) { 1041 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1042 } 1043 1044 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1045 if (aij->compressedrow.use){ 1046 m = aij->compressedrow.nrows; 1047 ii = aij->compressedrow.i; 1048 ridx = aij->compressedrow.rindex; 1049 for (i=0; i<m; i++){ 1050 n = ii[i+1] - ii[i]; 1051 aj = aij->j + ii[i]; 1052 aa = aij->a + ii[i]; 1053 1054 for (j=0; j<n; j++) { 1055 if (PetscAbsScalar(mask[*aj])) { 1056 if (b) bb[*ridx] -= *aa*xx[*aj]; 1057 *aa = 0.0; 1058 } 1059 aa++; 1060 aj++; 1061 } 1062 ridx++; 1063 } 1064 } else { /* do not use compressed row format */ 1065 m = l->B->rmap->n; 1066 for (i=0; i<m; i++) { 1067 n = ii[i+1] - ii[i]; 1068 aj = aij->j + ii[i]; 1069 aa = aij->a + ii[i]; 1070 for (j=0; j<n; j++) { 1071 if (PetscAbsScalar(mask[*aj])) { 1072 if (b) bb[i] -= *aa*xx[*aj]; 1073 *aa = 0.0; 1074 } 1075 aa++; 1076 aj++; 1077 } 1078 } 1079 } 1080 if (x) { 1081 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1082 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1083 } 1084 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1085 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1086 ierr = PetscFree(lrows);CHKERRQ(ierr); 1087 1088 /* wait on sends */ 1089 if (nsends) { 1090 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1091 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1092 ierr = PetscFree(send_status);CHKERRQ(ierr); 1093 } 1094 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1095 ierr = PetscFree(svalues);CHKERRQ(ierr); 1096 1097 PetscFunctionReturn(0); 1098 } 1099 1100 #undef __FUNCT__ 1101 #define __FUNCT__ "MatMult_MPIAIJ" 1102 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1103 { 1104 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1105 PetscErrorCode ierr; 1106 PetscInt nt; 1107 1108 PetscFunctionBegin; 1109 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1110 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1111 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1112 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1113 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1114 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1115 PetscFunctionReturn(0); 1116 } 1117 1118 #undef __FUNCT__ 1119 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1120 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1121 { 1122 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1123 PetscErrorCode ierr; 1124 1125 PetscFunctionBegin; 1126 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1127 PetscFunctionReturn(0); 1128 } 1129 1130 #undef __FUNCT__ 1131 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1132 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1133 { 1134 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1135 PetscErrorCode ierr; 1136 1137 PetscFunctionBegin; 1138 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1139 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1140 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1141 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1142 PetscFunctionReturn(0); 1143 } 1144 1145 #undef __FUNCT__ 1146 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1147 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1148 { 1149 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1150 PetscErrorCode ierr; 1151 PetscBool merged; 1152 1153 PetscFunctionBegin; 1154 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1155 /* do nondiagonal part */ 1156 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1157 if (!merged) { 1158 /* send it on its way */ 1159 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1160 /* do local part */ 1161 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1162 /* receive remote parts: note this assumes the values are not actually */ 1163 /* added in yy until the next line, */ 1164 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1165 } else { 1166 /* do local part */ 1167 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1168 /* send it on its way */ 1169 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1170 /* values actually were received in the Begin() but we need to call this nop */ 1171 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1172 } 1173 PetscFunctionReturn(0); 1174 } 1175 1176 EXTERN_C_BEGIN 1177 #undef __FUNCT__ 1178 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1179 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1180 { 1181 MPI_Comm comm; 1182 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 1183 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1184 IS Me,Notme; 1185 PetscErrorCode ierr; 1186 PetscInt M,N,first,last,*notme,i; 1187 PetscMPIInt size; 1188 1189 PetscFunctionBegin; 1190 1191 /* Easy test: symmetric diagonal block */ 1192 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 1193 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1194 if (!*f) PetscFunctionReturn(0); 1195 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1196 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1197 if (size == 1) PetscFunctionReturn(0); 1198 1199 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1200 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1201 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1202 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1203 for (i=0; i<first; i++) notme[i] = i; 1204 for (i=last; i<M; i++) notme[i-last+first] = i; 1205 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1206 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1207 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1208 Aoff = Aoffs[0]; 1209 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1210 Boff = Boffs[0]; 1211 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1212 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1213 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1214 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1215 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1216 ierr = PetscFree(notme);CHKERRQ(ierr); 1217 PetscFunctionReturn(0); 1218 } 1219 EXTERN_C_END 1220 1221 #undef __FUNCT__ 1222 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1223 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1224 { 1225 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1226 PetscErrorCode ierr; 1227 1228 PetscFunctionBegin; 1229 /* do nondiagonal part */ 1230 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1231 /* send it on its way */ 1232 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1233 /* do local part */ 1234 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1235 /* receive remote parts */ 1236 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1237 PetscFunctionReturn(0); 1238 } 1239 1240 /* 1241 This only works correctly for square matrices where the subblock A->A is the 1242 diagonal block 1243 */ 1244 #undef __FUNCT__ 1245 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1246 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1247 { 1248 PetscErrorCode ierr; 1249 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1250 1251 PetscFunctionBegin; 1252 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1253 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1254 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1255 PetscFunctionReturn(0); 1256 } 1257 1258 #undef __FUNCT__ 1259 #define __FUNCT__ "MatScale_MPIAIJ" 1260 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1261 { 1262 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1263 PetscErrorCode ierr; 1264 1265 PetscFunctionBegin; 1266 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1267 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1268 PetscFunctionReturn(0); 1269 } 1270 1271 #undef __FUNCT__ 1272 #define __FUNCT__ "MatDestroy_MPIAIJ" 1273 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1274 { 1275 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1276 PetscErrorCode ierr; 1277 1278 PetscFunctionBegin; 1279 #if defined(PETSC_USE_LOG) 1280 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1281 #endif 1282 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1283 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1284 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1285 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1286 #if defined (PETSC_USE_CTABLE) 1287 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1288 #else 1289 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1290 #endif 1291 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1292 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1293 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1294 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1295 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1296 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1297 1298 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1299 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1303 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1304 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1305 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1306 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1307 PetscFunctionReturn(0); 1308 } 1309 1310 #undef __FUNCT__ 1311 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1312 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1313 { 1314 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1315 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 1316 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 1317 PetscErrorCode ierr; 1318 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1319 int fd; 1320 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1321 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1322 PetscScalar *column_values; 1323 PetscInt message_count,flowcontrolcount; 1324 FILE *file; 1325 1326 PetscFunctionBegin; 1327 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1328 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1329 nz = A->nz + B->nz; 1330 if (!rank) { 1331 header[0] = MAT_FILE_CLASSID; 1332 header[1] = mat->rmap->N; 1333 header[2] = mat->cmap->N; 1334 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1335 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1336 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1337 /* get largest number of rows any processor has */ 1338 rlen = mat->rmap->n; 1339 range = mat->rmap->range; 1340 for (i=1; i<size; i++) { 1341 rlen = PetscMax(rlen,range[i+1] - range[i]); 1342 } 1343 } else { 1344 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1345 rlen = mat->rmap->n; 1346 } 1347 1348 /* load up the local row counts */ 1349 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1350 for (i=0; i<mat->rmap->n; i++) { 1351 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1352 } 1353 1354 /* store the row lengths to the file */ 1355 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1356 if (!rank) { 1357 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1358 for (i=1; i<size; i++) { 1359 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1360 rlen = range[i+1] - range[i]; 1361 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1362 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1363 } 1364 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1365 } else { 1366 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1367 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1368 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1369 } 1370 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1371 1372 /* load up the local column indices */ 1373 nzmax = nz; /* )th processor needs space a largest processor needs */ 1374 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1375 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1376 cnt = 0; 1377 for (i=0; i<mat->rmap->n; i++) { 1378 for (j=B->i[i]; j<B->i[i+1]; j++) { 1379 if ( (col = garray[B->j[j]]) > cstart) break; 1380 column_indices[cnt++] = col; 1381 } 1382 for (k=A->i[i]; k<A->i[i+1]; k++) { 1383 column_indices[cnt++] = A->j[k] + cstart; 1384 } 1385 for (; j<B->i[i+1]; j++) { 1386 column_indices[cnt++] = garray[B->j[j]]; 1387 } 1388 } 1389 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1390 1391 /* store the column indices to the file */ 1392 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1393 if (!rank) { 1394 MPI_Status status; 1395 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1396 for (i=1; i<size; i++) { 1397 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1398 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1399 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1400 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1401 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1402 } 1403 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1404 } else { 1405 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1406 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1407 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1408 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1409 } 1410 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1411 1412 /* load up the local column values */ 1413 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1414 cnt = 0; 1415 for (i=0; i<mat->rmap->n; i++) { 1416 for (j=B->i[i]; j<B->i[i+1]; j++) { 1417 if ( garray[B->j[j]] > cstart) break; 1418 column_values[cnt++] = B->a[j]; 1419 } 1420 for (k=A->i[i]; k<A->i[i+1]; k++) { 1421 column_values[cnt++] = A->a[k]; 1422 } 1423 for (; j<B->i[i+1]; j++) { 1424 column_values[cnt++] = B->a[j]; 1425 } 1426 } 1427 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1428 1429 /* store the column values to the file */ 1430 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1431 if (!rank) { 1432 MPI_Status status; 1433 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1434 for (i=1; i<size; i++) { 1435 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1436 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1437 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1438 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1439 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1440 } 1441 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1442 } else { 1443 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1444 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1445 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1446 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1447 } 1448 ierr = PetscFree(column_values);CHKERRQ(ierr); 1449 1450 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1451 if (file) { 1452 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1453 } 1454 1455 PetscFunctionReturn(0); 1456 } 1457 1458 #undef __FUNCT__ 1459 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1460 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1461 { 1462 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1463 PetscErrorCode ierr; 1464 PetscMPIInt rank = aij->rank,size = aij->size; 1465 PetscBool isdraw,iascii,isbinary; 1466 PetscViewer sviewer; 1467 PetscViewerFormat format; 1468 1469 PetscFunctionBegin; 1470 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1471 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1472 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1473 if (iascii) { 1474 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1475 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1476 MatInfo info; 1477 PetscBool inodes; 1478 1479 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1480 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1481 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1482 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1483 if (!inodes) { 1484 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1485 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1486 } else { 1487 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1488 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1489 } 1490 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1491 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1492 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1493 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1494 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1495 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1496 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1497 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1498 PetscFunctionReturn(0); 1499 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1500 PetscInt inodecount,inodelimit,*inodes; 1501 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1502 if (inodes) { 1503 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1504 } else { 1505 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1506 } 1507 PetscFunctionReturn(0); 1508 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1509 PetscFunctionReturn(0); 1510 } 1511 } else if (isbinary) { 1512 if (size == 1) { 1513 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1514 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1515 } else { 1516 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1517 } 1518 PetscFunctionReturn(0); 1519 } else if (isdraw) { 1520 PetscDraw draw; 1521 PetscBool isnull; 1522 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1523 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1524 } 1525 1526 if (size == 1) { 1527 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1528 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1529 } else { 1530 /* assemble the entire matrix onto first processor. */ 1531 Mat A; 1532 Mat_SeqAIJ *Aloc; 1533 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1534 MatScalar *a; 1535 1536 if (mat->rmap->N > 1024) { 1537 PetscBool flg = PETSC_FALSE; 1538 1539 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1540 if (!flg) { 1541 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1542 } 1543 } 1544 1545 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1546 if (!rank) { 1547 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1548 } else { 1549 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1550 } 1551 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1552 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1553 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1554 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1555 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1556 1557 /* copy over the A part */ 1558 Aloc = (Mat_SeqAIJ*)aij->A->data; 1559 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1560 row = mat->rmap->rstart; 1561 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1562 for (i=0; i<m; i++) { 1563 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1564 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1565 } 1566 aj = Aloc->j; 1567 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1568 1569 /* copy over the B part */ 1570 Aloc = (Mat_SeqAIJ*)aij->B->data; 1571 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1572 row = mat->rmap->rstart; 1573 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1574 ct = cols; 1575 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1576 for (i=0; i<m; i++) { 1577 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1578 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1579 } 1580 ierr = PetscFree(ct);CHKERRQ(ierr); 1581 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1582 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1583 /* 1584 Everyone has to call to draw the matrix since the graphics waits are 1585 synchronized across all processors that share the PetscDraw object 1586 */ 1587 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1588 if (!rank) { 1589 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1590 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1591 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1592 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1593 } 1594 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1595 ierr = MatDestroy(&A);CHKERRQ(ierr); 1596 } 1597 PetscFunctionReturn(0); 1598 } 1599 1600 #undef __FUNCT__ 1601 #define __FUNCT__ "MatView_MPIAIJ" 1602 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1603 { 1604 PetscErrorCode ierr; 1605 PetscBool iascii,isdraw,issocket,isbinary; 1606 1607 PetscFunctionBegin; 1608 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1609 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1610 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1611 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1612 if (iascii || isdraw || isbinary || issocket) { 1613 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1614 } else { 1615 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1616 } 1617 PetscFunctionReturn(0); 1618 } 1619 1620 #undef __FUNCT__ 1621 #define __FUNCT__ "MatSOR_MPIAIJ" 1622 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1623 { 1624 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1625 PetscErrorCode ierr; 1626 Vec bb1 = 0; 1627 PetscBool hasop; 1628 1629 PetscFunctionBegin; 1630 if (flag == SOR_APPLY_UPPER) { 1631 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1632 PetscFunctionReturn(0); 1633 } 1634 1635 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1636 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1637 } 1638 1639 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1640 if (flag & SOR_ZERO_INITIAL_GUESS) { 1641 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1642 its--; 1643 } 1644 1645 while (its--) { 1646 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1647 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1648 1649 /* update rhs: bb1 = bb - B*x */ 1650 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1651 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1652 1653 /* local sweep */ 1654 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1655 } 1656 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1657 if (flag & SOR_ZERO_INITIAL_GUESS) { 1658 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1659 its--; 1660 } 1661 while (its--) { 1662 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1663 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1664 1665 /* update rhs: bb1 = bb - B*x */ 1666 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1667 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1668 1669 /* local sweep */ 1670 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1671 } 1672 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1673 if (flag & SOR_ZERO_INITIAL_GUESS) { 1674 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1675 its--; 1676 } 1677 while (its--) { 1678 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1679 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1680 1681 /* update rhs: bb1 = bb - B*x */ 1682 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1683 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1684 1685 /* local sweep */ 1686 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1687 } 1688 } else if (flag & SOR_EISENSTAT) { 1689 Vec xx1; 1690 1691 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1692 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1693 1694 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1695 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1696 if (!mat->diag) { 1697 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1698 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1699 } 1700 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1701 if (hasop) { 1702 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1703 } else { 1704 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1705 } 1706 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1707 1708 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1709 1710 /* local sweep */ 1711 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1712 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1713 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1714 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1715 1716 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1717 PetscFunctionReturn(0); 1718 } 1719 1720 #undef __FUNCT__ 1721 #define __FUNCT__ "MatPermute_MPIAIJ" 1722 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1723 { 1724 MPI_Comm comm; 1725 PetscInt first,local_rowsize,local_colsize; 1726 const PetscInt *rows; 1727 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1728 PetscErrorCode ierr; 1729 1730 PetscFunctionBegin; 1731 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1732 /* make a collective version of 'rowp', this is to be tolerant of users who pass serial index sets */ 1733 ierr = ISOnComm(rowp,comm,PETSC_USE_POINTER,&crowp);CHKERRQ(ierr); 1734 /* collect the global row permutation and invert it */ 1735 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1736 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1737 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 1738 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1739 ierr = ISDestroy(&growp);CHKERRQ(ierr); 1740 /* get the local target indices */ 1741 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1742 ierr = MatGetLocalSize(A,&local_rowsize,&local_colsize);CHKERRQ(ierr); 1743 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1744 ierr = ISCreateGeneral(PETSC_COMM_SELF,local_rowsize,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 1745 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1746 ierr = ISDestroy(&irowp);CHKERRQ(ierr); 1747 /* the column permutation is so much easier; 1748 make a local version of 'colp' and invert it */ 1749 ierr = ISOnComm(colp,PETSC_COMM_SELF,PETSC_USE_POINTER,&lcolp);CHKERRQ(ierr); 1750 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1751 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1752 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 1753 /* now we just get the submatrix */ 1754 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_colsize,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1755 /* clean up */ 1756 ierr = ISDestroy(&lrowp);CHKERRQ(ierr); 1757 ierr = ISDestroy(&icolp);CHKERRQ(ierr); 1758 PetscFunctionReturn(0); 1759 } 1760 1761 #undef __FUNCT__ 1762 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1763 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1764 { 1765 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1766 Mat A = mat->A,B = mat->B; 1767 PetscErrorCode ierr; 1768 PetscReal isend[5],irecv[5]; 1769 1770 PetscFunctionBegin; 1771 info->block_size = 1.0; 1772 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1773 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1774 isend[3] = info->memory; isend[4] = info->mallocs; 1775 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1776 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1777 isend[3] += info->memory; isend[4] += info->mallocs; 1778 if (flag == MAT_LOCAL) { 1779 info->nz_used = isend[0]; 1780 info->nz_allocated = isend[1]; 1781 info->nz_unneeded = isend[2]; 1782 info->memory = isend[3]; 1783 info->mallocs = isend[4]; 1784 } else if (flag == MAT_GLOBAL_MAX) { 1785 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1786 info->nz_used = irecv[0]; 1787 info->nz_allocated = irecv[1]; 1788 info->nz_unneeded = irecv[2]; 1789 info->memory = irecv[3]; 1790 info->mallocs = irecv[4]; 1791 } else if (flag == MAT_GLOBAL_SUM) { 1792 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1793 info->nz_used = irecv[0]; 1794 info->nz_allocated = irecv[1]; 1795 info->nz_unneeded = irecv[2]; 1796 info->memory = irecv[3]; 1797 info->mallocs = irecv[4]; 1798 } 1799 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1800 info->fill_ratio_needed = 0; 1801 info->factor_mallocs = 0; 1802 PetscFunctionReturn(0); 1803 } 1804 1805 #undef __FUNCT__ 1806 #define __FUNCT__ "MatSetOption_MPIAIJ" 1807 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1808 { 1809 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1810 PetscErrorCode ierr; 1811 1812 PetscFunctionBegin; 1813 switch (op) { 1814 case MAT_NEW_NONZERO_LOCATIONS: 1815 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1816 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1817 case MAT_KEEP_NONZERO_PATTERN: 1818 case MAT_NEW_NONZERO_LOCATION_ERR: 1819 case MAT_USE_INODES: 1820 case MAT_IGNORE_ZERO_ENTRIES: 1821 MatCheckPreallocated(A,1); 1822 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1823 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1824 break; 1825 case MAT_ROW_ORIENTED: 1826 a->roworiented = flg; 1827 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1828 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1829 break; 1830 case MAT_NEW_DIAGONALS: 1831 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1832 break; 1833 case MAT_IGNORE_OFF_PROC_ENTRIES: 1834 a->donotstash = flg; 1835 break; 1836 case MAT_SPD: 1837 A->spd_set = PETSC_TRUE; 1838 A->spd = flg; 1839 if (flg) { 1840 A->symmetric = PETSC_TRUE; 1841 A->structurally_symmetric = PETSC_TRUE; 1842 A->symmetric_set = PETSC_TRUE; 1843 A->structurally_symmetric_set = PETSC_TRUE; 1844 } 1845 break; 1846 case MAT_SYMMETRIC: 1847 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1848 break; 1849 case MAT_STRUCTURALLY_SYMMETRIC: 1850 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1851 break; 1852 case MAT_HERMITIAN: 1853 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1854 break; 1855 case MAT_SYMMETRY_ETERNAL: 1856 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1857 break; 1858 default: 1859 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1860 } 1861 PetscFunctionReturn(0); 1862 } 1863 1864 #undef __FUNCT__ 1865 #define __FUNCT__ "MatGetRow_MPIAIJ" 1866 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1867 { 1868 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1869 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1870 PetscErrorCode ierr; 1871 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1872 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1873 PetscInt *cmap,*idx_p; 1874 1875 PetscFunctionBegin; 1876 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1877 mat->getrowactive = PETSC_TRUE; 1878 1879 if (!mat->rowvalues && (idx || v)) { 1880 /* 1881 allocate enough space to hold information from the longest row. 1882 */ 1883 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1884 PetscInt max = 1,tmp; 1885 for (i=0; i<matin->rmap->n; i++) { 1886 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1887 if (max < tmp) { max = tmp; } 1888 } 1889 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1890 } 1891 1892 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1893 lrow = row - rstart; 1894 1895 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1896 if (!v) {pvA = 0; pvB = 0;} 1897 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1898 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1899 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1900 nztot = nzA + nzB; 1901 1902 cmap = mat->garray; 1903 if (v || idx) { 1904 if (nztot) { 1905 /* Sort by increasing column numbers, assuming A and B already sorted */ 1906 PetscInt imark = -1; 1907 if (v) { 1908 *v = v_p = mat->rowvalues; 1909 for (i=0; i<nzB; i++) { 1910 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1911 else break; 1912 } 1913 imark = i; 1914 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1915 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1916 } 1917 if (idx) { 1918 *idx = idx_p = mat->rowindices; 1919 if (imark > -1) { 1920 for (i=0; i<imark; i++) { 1921 idx_p[i] = cmap[cworkB[i]]; 1922 } 1923 } else { 1924 for (i=0; i<nzB; i++) { 1925 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1926 else break; 1927 } 1928 imark = i; 1929 } 1930 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1931 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1932 } 1933 } else { 1934 if (idx) *idx = 0; 1935 if (v) *v = 0; 1936 } 1937 } 1938 *nz = nztot; 1939 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1940 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1941 PetscFunctionReturn(0); 1942 } 1943 1944 #undef __FUNCT__ 1945 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1946 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1947 { 1948 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1949 1950 PetscFunctionBegin; 1951 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1952 aij->getrowactive = PETSC_FALSE; 1953 PetscFunctionReturn(0); 1954 } 1955 1956 #undef __FUNCT__ 1957 #define __FUNCT__ "MatNorm_MPIAIJ" 1958 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1959 { 1960 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1961 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1962 PetscErrorCode ierr; 1963 PetscInt i,j,cstart = mat->cmap->rstart; 1964 PetscReal sum = 0.0; 1965 MatScalar *v; 1966 1967 PetscFunctionBegin; 1968 if (aij->size == 1) { 1969 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1970 } else { 1971 if (type == NORM_FROBENIUS) { 1972 v = amat->a; 1973 for (i=0; i<amat->nz; i++) { 1974 #if defined(PETSC_USE_COMPLEX) 1975 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1976 #else 1977 sum += (*v)*(*v); v++; 1978 #endif 1979 } 1980 v = bmat->a; 1981 for (i=0; i<bmat->nz; i++) { 1982 #if defined(PETSC_USE_COMPLEX) 1983 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1984 #else 1985 sum += (*v)*(*v); v++; 1986 #endif 1987 } 1988 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1989 *norm = PetscSqrtReal(*norm); 1990 } else if (type == NORM_1) { /* max column norm */ 1991 PetscReal *tmp,*tmp2; 1992 PetscInt *jj,*garray = aij->garray; 1993 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1994 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1995 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1996 *norm = 0.0; 1997 v = amat->a; jj = amat->j; 1998 for (j=0; j<amat->nz; j++) { 1999 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 2000 } 2001 v = bmat->a; jj = bmat->j; 2002 for (j=0; j<bmat->nz; j++) { 2003 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 2004 } 2005 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 2006 for (j=0; j<mat->cmap->N; j++) { 2007 if (tmp2[j] > *norm) *norm = tmp2[j]; 2008 } 2009 ierr = PetscFree(tmp);CHKERRQ(ierr); 2010 ierr = PetscFree(tmp2);CHKERRQ(ierr); 2011 } else if (type == NORM_INFINITY) { /* max row norm */ 2012 PetscReal ntemp = 0.0; 2013 for (j=0; j<aij->A->rmap->n; j++) { 2014 v = amat->a + amat->i[j]; 2015 sum = 0.0; 2016 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 2017 sum += PetscAbsScalar(*v); v++; 2018 } 2019 v = bmat->a + bmat->i[j]; 2020 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2021 sum += PetscAbsScalar(*v); v++; 2022 } 2023 if (sum > ntemp) ntemp = sum; 2024 } 2025 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 2026 } else { 2027 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 2028 } 2029 } 2030 PetscFunctionReturn(0); 2031 } 2032 2033 #undef __FUNCT__ 2034 #define __FUNCT__ "MatTranspose_MPIAIJ" 2035 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2036 { 2037 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2038 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2039 PetscErrorCode ierr; 2040 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 2041 PetscInt cstart=A->cmap->rstart,ncol; 2042 Mat B; 2043 MatScalar *array; 2044 2045 PetscFunctionBegin; 2046 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2047 2048 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2049 ai = Aloc->i; aj = Aloc->j; 2050 bi = Bloc->i; bj = Bloc->j; 2051 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2052 PetscInt *d_nnz,*g_nnz,*o_nnz; 2053 PetscSFNode *oloc; 2054 PetscSF sf; 2055 2056 ierr = PetscMalloc4(na,PetscInt,&d_nnz,na,PetscInt,&o_nnz,nb,PetscInt,&g_nnz,nb,PetscSFNode,&oloc);CHKERRQ(ierr); 2057 /* compute d_nnz for preallocation */ 2058 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2059 for (i=0; i<ai[ma]; i++) { 2060 d_nnz[aj[i]] ++; 2061 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2062 } 2063 /* compute local off-diagonal contributions */ 2064 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 2065 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2066 /* map those to global */ 2067 ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr); 2068 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,PETSC_NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2069 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2070 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2071 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2072 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2073 2074 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 2075 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2076 ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs); CHKERRQ(ierr); 2077 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2078 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2079 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2080 } else { 2081 B = *matout; 2082 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2083 for (i=0; i<ai[ma]; i++){ 2084 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2085 } 2086 } 2087 2088 /* copy over the A part */ 2089 array = Aloc->a; 2090 row = A->rmap->rstart; 2091 for (i=0; i<ma; i++) { 2092 ncol = ai[i+1]-ai[i]; 2093 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2094 row++; array += ncol; aj += ncol; 2095 } 2096 aj = Aloc->j; 2097 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2098 2099 /* copy over the B part */ 2100 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2101 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2102 array = Bloc->a; 2103 row = A->rmap->rstart; 2104 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 2105 cols_tmp = cols; 2106 for (i=0; i<mb; i++) { 2107 ncol = bi[i+1]-bi[i]; 2108 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2109 row++; array += ncol; cols_tmp += ncol; 2110 } 2111 ierr = PetscFree(cols);CHKERRQ(ierr); 2112 2113 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2114 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2115 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2116 *matout = B; 2117 } else { 2118 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2119 } 2120 PetscFunctionReturn(0); 2121 } 2122 2123 #undef __FUNCT__ 2124 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2125 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2126 { 2127 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2128 Mat a = aij->A,b = aij->B; 2129 PetscErrorCode ierr; 2130 PetscInt s1,s2,s3; 2131 2132 PetscFunctionBegin; 2133 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2134 if (rr) { 2135 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2136 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2137 /* Overlap communication with computation. */ 2138 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2139 } 2140 if (ll) { 2141 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2142 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2143 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2144 } 2145 /* scale the diagonal block */ 2146 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2147 2148 if (rr) { 2149 /* Do a scatter end and then right scale the off-diagonal block */ 2150 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2151 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2152 } 2153 2154 PetscFunctionReturn(0); 2155 } 2156 2157 #undef __FUNCT__ 2158 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2159 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2160 { 2161 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2162 PetscErrorCode ierr; 2163 2164 PetscFunctionBegin; 2165 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2166 PetscFunctionReturn(0); 2167 } 2168 2169 #undef __FUNCT__ 2170 #define __FUNCT__ "MatEqual_MPIAIJ" 2171 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2172 { 2173 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2174 Mat a,b,c,d; 2175 PetscBool flg; 2176 PetscErrorCode ierr; 2177 2178 PetscFunctionBegin; 2179 a = matA->A; b = matA->B; 2180 c = matB->A; d = matB->B; 2181 2182 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2183 if (flg) { 2184 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2185 } 2186 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2187 PetscFunctionReturn(0); 2188 } 2189 2190 #undef __FUNCT__ 2191 #define __FUNCT__ "MatCopy_MPIAIJ" 2192 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2193 { 2194 PetscErrorCode ierr; 2195 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2196 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2197 2198 PetscFunctionBegin; 2199 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2200 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2201 /* because of the column compression in the off-processor part of the matrix a->B, 2202 the number of columns in a->B and b->B may be different, hence we cannot call 2203 the MatCopy() directly on the two parts. If need be, we can provide a more 2204 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2205 then copying the submatrices */ 2206 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2207 } else { 2208 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2209 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2210 } 2211 PetscFunctionReturn(0); 2212 } 2213 2214 #undef __FUNCT__ 2215 #define __FUNCT__ "MatSetUp_MPIAIJ" 2216 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2217 { 2218 PetscErrorCode ierr; 2219 2220 PetscFunctionBegin; 2221 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2222 PetscFunctionReturn(0); 2223 } 2224 2225 #undef __FUNCT__ 2226 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2227 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2228 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt* nnz) 2229 { 2230 PetscInt i,m=Y->rmap->N; 2231 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2232 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2233 const PetscInt *xi = x->i,*yi = y->i; 2234 2235 PetscFunctionBegin; 2236 /* Set the number of nonzeros in the new matrix */ 2237 for (i=0; i<m; i++) { 2238 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2239 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2240 nnz[i] = 0; 2241 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2242 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2243 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2244 nnz[i]++; 2245 } 2246 for (; k<nzy; k++) nnz[i]++; 2247 } 2248 PetscFunctionReturn(0); 2249 } 2250 2251 #undef __FUNCT__ 2252 #define __FUNCT__ "MatAXPY_MPIAIJ" 2253 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2254 { 2255 PetscErrorCode ierr; 2256 PetscInt i; 2257 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 2258 PetscBLASInt bnz,one=1; 2259 Mat_SeqAIJ *x,*y; 2260 2261 PetscFunctionBegin; 2262 if (str == SAME_NONZERO_PATTERN) { 2263 PetscScalar alpha = a; 2264 x = (Mat_SeqAIJ *)xx->A->data; 2265 y = (Mat_SeqAIJ *)yy->A->data; 2266 bnz = PetscBLASIntCast(x->nz); 2267 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2268 x = (Mat_SeqAIJ *)xx->B->data; 2269 y = (Mat_SeqAIJ *)yy->B->data; 2270 bnz = PetscBLASIntCast(x->nz); 2271 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2272 } else if (str == SUBSET_NONZERO_PATTERN) { 2273 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2274 2275 x = (Mat_SeqAIJ *)xx->B->data; 2276 y = (Mat_SeqAIJ *)yy->B->data; 2277 if (y->xtoy && y->XtoY != xx->B) { 2278 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2279 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2280 } 2281 if (!y->xtoy) { /* get xtoy */ 2282 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2283 y->XtoY = xx->B; 2284 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2285 } 2286 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2287 } else { 2288 Mat B; 2289 PetscInt *nnz_d,*nnz_o; 2290 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2291 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2292 ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr); 2293 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2294 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2295 ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr); 2296 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2297 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2298 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2299 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2300 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2301 ierr = MatHeaderReplace(Y,B); 2302 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2303 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2304 } 2305 PetscFunctionReturn(0); 2306 } 2307 2308 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2309 2310 #undef __FUNCT__ 2311 #define __FUNCT__ "MatConjugate_MPIAIJ" 2312 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2313 { 2314 #if defined(PETSC_USE_COMPLEX) 2315 PetscErrorCode ierr; 2316 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2317 2318 PetscFunctionBegin; 2319 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2320 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2321 #else 2322 PetscFunctionBegin; 2323 #endif 2324 PetscFunctionReturn(0); 2325 } 2326 2327 #undef __FUNCT__ 2328 #define __FUNCT__ "MatRealPart_MPIAIJ" 2329 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2330 { 2331 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2332 PetscErrorCode ierr; 2333 2334 PetscFunctionBegin; 2335 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2336 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2337 PetscFunctionReturn(0); 2338 } 2339 2340 #undef __FUNCT__ 2341 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2342 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2343 { 2344 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2345 PetscErrorCode ierr; 2346 2347 PetscFunctionBegin; 2348 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2349 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2350 PetscFunctionReturn(0); 2351 } 2352 2353 #ifdef PETSC_HAVE_PBGL 2354 2355 #include <boost/parallel/mpi/bsp_process_group.hpp> 2356 #include <boost/graph/distributed/ilu_default_graph.hpp> 2357 #include <boost/graph/distributed/ilu_0_block.hpp> 2358 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2359 #include <boost/graph/distributed/petsc/interface.hpp> 2360 #include <boost/multi_array.hpp> 2361 #include <boost/parallel/distributed_property_map->hpp> 2362 2363 #undef __FUNCT__ 2364 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2365 /* 2366 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2367 */ 2368 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2369 { 2370 namespace petsc = boost::distributed::petsc; 2371 2372 namespace graph_dist = boost::graph::distributed; 2373 using boost::graph::distributed::ilu_default::process_group_type; 2374 using boost::graph::ilu_permuted; 2375 2376 PetscBool row_identity, col_identity; 2377 PetscContainer c; 2378 PetscInt m, n, M, N; 2379 PetscErrorCode ierr; 2380 2381 PetscFunctionBegin; 2382 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2383 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2384 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2385 if (!row_identity || !col_identity) { 2386 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2387 } 2388 2389 process_group_type pg; 2390 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2391 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2392 lgraph_type& level_graph = *lgraph_p; 2393 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2394 2395 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2396 ilu_permuted(level_graph); 2397 2398 /* put together the new matrix */ 2399 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 2400 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2401 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2402 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2403 ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs); CHKERRQ(ierr); 2404 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2405 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2406 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2407 2408 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 2409 ierr = PetscContainerSetPointer(c, lgraph_p); 2410 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2411 ierr = PetscContainerDestroy(&c); 2412 PetscFunctionReturn(0); 2413 } 2414 2415 #undef __FUNCT__ 2416 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2417 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2418 { 2419 PetscFunctionBegin; 2420 PetscFunctionReturn(0); 2421 } 2422 2423 #undef __FUNCT__ 2424 #define __FUNCT__ "MatSolve_MPIAIJ" 2425 /* 2426 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2427 */ 2428 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2429 { 2430 namespace graph_dist = boost::graph::distributed; 2431 2432 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2433 lgraph_type* lgraph_p; 2434 PetscContainer c; 2435 PetscErrorCode ierr; 2436 2437 PetscFunctionBegin; 2438 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 2439 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 2440 ierr = VecCopy(b, x);CHKERRQ(ierr); 2441 2442 PetscScalar* array_x; 2443 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2444 PetscInt sx; 2445 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2446 2447 PetscScalar* array_b; 2448 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2449 PetscInt sb; 2450 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2451 2452 lgraph_type& level_graph = *lgraph_p; 2453 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2454 2455 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2456 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2457 ref_x(array_x, boost::extents[num_vertices(graph)]); 2458 2459 typedef boost::iterator_property_map<array_ref_type::iterator, 2460 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2461 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2462 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2463 2464 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2465 2466 PetscFunctionReturn(0); 2467 } 2468 #endif 2469 2470 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2471 PetscInt nzlocal,nsends,nrecvs; 2472 PetscMPIInt *send_rank,*recv_rank; 2473 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2474 PetscScalar *sbuf_a,**rbuf_a; 2475 PetscErrorCode (*Destroy)(Mat); 2476 } Mat_Redundant; 2477 2478 #undef __FUNCT__ 2479 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2480 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2481 { 2482 PetscErrorCode ierr; 2483 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2484 PetscInt i; 2485 2486 PetscFunctionBegin; 2487 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2488 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2489 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2490 for (i=0; i<redund->nrecvs; i++){ 2491 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2492 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2493 } 2494 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2495 ierr = PetscFree(redund);CHKERRQ(ierr); 2496 PetscFunctionReturn(0); 2497 } 2498 2499 #undef __FUNCT__ 2500 #define __FUNCT__ "MatDestroy_MatRedundant" 2501 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2502 { 2503 PetscErrorCode ierr; 2504 PetscContainer container; 2505 Mat_Redundant *redund=PETSC_NULL; 2506 2507 PetscFunctionBegin; 2508 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2509 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2510 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2511 A->ops->destroy = redund->Destroy; 2512 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2513 if (A->ops->destroy) { 2514 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2515 } 2516 PetscFunctionReturn(0); 2517 } 2518 2519 #undef __FUNCT__ 2520 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2521 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2522 { 2523 PetscMPIInt rank,size; 2524 MPI_Comm comm=((PetscObject)mat)->comm; 2525 PetscErrorCode ierr; 2526 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2527 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2528 PetscInt *rowrange=mat->rmap->range; 2529 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2530 Mat A=aij->A,B=aij->B,C=*matredundant; 2531 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2532 PetscScalar *sbuf_a; 2533 PetscInt nzlocal=a->nz+b->nz; 2534 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2535 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2536 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2537 MatScalar *aworkA,*aworkB; 2538 PetscScalar *vals; 2539 PetscMPIInt tag1,tag2,tag3,imdex; 2540 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2541 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2542 MPI_Status recv_status,*send_status; 2543 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2544 PetscInt **rbuf_j=PETSC_NULL; 2545 PetscScalar **rbuf_a=PETSC_NULL; 2546 Mat_Redundant *redund=PETSC_NULL; 2547 PetscContainer container; 2548 2549 PetscFunctionBegin; 2550 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2551 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2552 2553 if (reuse == MAT_REUSE_MATRIX) { 2554 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2555 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2556 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2557 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2558 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2559 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2560 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2561 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2562 2563 nsends = redund->nsends; 2564 nrecvs = redund->nrecvs; 2565 send_rank = redund->send_rank; 2566 recv_rank = redund->recv_rank; 2567 sbuf_nz = redund->sbuf_nz; 2568 rbuf_nz = redund->rbuf_nz; 2569 sbuf_j = redund->sbuf_j; 2570 sbuf_a = redund->sbuf_a; 2571 rbuf_j = redund->rbuf_j; 2572 rbuf_a = redund->rbuf_a; 2573 } 2574 2575 if (reuse == MAT_INITIAL_MATRIX){ 2576 PetscMPIInt subrank,subsize; 2577 PetscInt nleftover,np_subcomm; 2578 /* get the destination processors' id send_rank, nsends and nrecvs */ 2579 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2580 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2581 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2582 np_subcomm = size/nsubcomm; 2583 nleftover = size - nsubcomm*np_subcomm; 2584 nsends = 0; nrecvs = 0; 2585 for (i=0; i<size; i++){ /* i=rank*/ 2586 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2587 send_rank[nsends] = i; nsends++; 2588 recv_rank[nrecvs++] = i; 2589 } 2590 } 2591 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2592 i = size-nleftover-1; 2593 j = 0; 2594 while (j < nsubcomm - nleftover){ 2595 send_rank[nsends++] = i; 2596 i--; j++; 2597 } 2598 } 2599 2600 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2601 for (i=0; i<nleftover; i++){ 2602 recv_rank[nrecvs++] = size-nleftover+i; 2603 } 2604 } 2605 2606 /* allocate sbuf_j, sbuf_a */ 2607 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2608 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2609 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2610 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2611 2612 /* copy mat's local entries into the buffers */ 2613 if (reuse == MAT_INITIAL_MATRIX){ 2614 rownz_max = 0; 2615 rptr = sbuf_j; 2616 cols = sbuf_j + rend-rstart + 1; 2617 vals = sbuf_a; 2618 rptr[0] = 0; 2619 for (i=0; i<rend-rstart; i++){ 2620 row = i + rstart; 2621 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2622 ncols = nzA + nzB; 2623 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2624 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2625 /* load the column indices for this row into cols */ 2626 lwrite = 0; 2627 for (l=0; l<nzB; l++) { 2628 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2629 vals[lwrite] = aworkB[l]; 2630 cols[lwrite++] = ctmp; 2631 } 2632 } 2633 for (l=0; l<nzA; l++){ 2634 vals[lwrite] = aworkA[l]; 2635 cols[lwrite++] = cstart + cworkA[l]; 2636 } 2637 for (l=0; l<nzB; l++) { 2638 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2639 vals[lwrite] = aworkB[l]; 2640 cols[lwrite++] = ctmp; 2641 } 2642 } 2643 vals += ncols; 2644 cols += ncols; 2645 rptr[i+1] = rptr[i] + ncols; 2646 if (rownz_max < ncols) rownz_max = ncols; 2647 } 2648 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2649 } else { /* only copy matrix values into sbuf_a */ 2650 rptr = sbuf_j; 2651 vals = sbuf_a; 2652 rptr[0] = 0; 2653 for (i=0; i<rend-rstart; i++){ 2654 row = i + rstart; 2655 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2656 ncols = nzA + nzB; 2657 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2658 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2659 lwrite = 0; 2660 for (l=0; l<nzB; l++) { 2661 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2662 } 2663 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2664 for (l=0; l<nzB; l++) { 2665 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2666 } 2667 vals += ncols; 2668 rptr[i+1] = rptr[i] + ncols; 2669 } 2670 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2671 2672 /* send nzlocal to others, and recv other's nzlocal */ 2673 /*--------------------------------------------------*/ 2674 if (reuse == MAT_INITIAL_MATRIX){ 2675 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2676 s_waits2 = s_waits3 + nsends; 2677 s_waits1 = s_waits2 + nsends; 2678 r_waits1 = s_waits1 + nsends; 2679 r_waits2 = r_waits1 + nrecvs; 2680 r_waits3 = r_waits2 + nrecvs; 2681 } else { 2682 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2683 r_waits3 = s_waits3 + nsends; 2684 } 2685 2686 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2687 if (reuse == MAT_INITIAL_MATRIX){ 2688 /* get new tags to keep the communication clean */ 2689 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2690 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2691 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2692 2693 /* post receives of other's nzlocal */ 2694 for (i=0; i<nrecvs; i++){ 2695 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2696 } 2697 /* send nzlocal to others */ 2698 for (i=0; i<nsends; i++){ 2699 sbuf_nz[i] = nzlocal; 2700 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2701 } 2702 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2703 count = nrecvs; 2704 while (count) { 2705 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2706 recv_rank[imdex] = recv_status.MPI_SOURCE; 2707 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2708 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2709 2710 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2711 rbuf_nz[imdex] += i + 2; 2712 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2713 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2714 count--; 2715 } 2716 /* wait on sends of nzlocal */ 2717 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2718 /* send mat->i,j to others, and recv from other's */ 2719 /*------------------------------------------------*/ 2720 for (i=0; i<nsends; i++){ 2721 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2722 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2723 } 2724 /* wait on receives of mat->i,j */ 2725 /*------------------------------*/ 2726 count = nrecvs; 2727 while (count) { 2728 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2729 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2730 count--; 2731 } 2732 /* wait on sends of mat->i,j */ 2733 /*---------------------------*/ 2734 if (nsends) { 2735 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2736 } 2737 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2738 2739 /* post receives, send and receive mat->a */ 2740 /*----------------------------------------*/ 2741 for (imdex=0; imdex<nrecvs; imdex++) { 2742 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2743 } 2744 for (i=0; i<nsends; i++){ 2745 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2746 } 2747 count = nrecvs; 2748 while (count) { 2749 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2750 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2751 count--; 2752 } 2753 if (nsends) { 2754 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2755 } 2756 2757 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2758 2759 /* create redundant matrix */ 2760 /*-------------------------*/ 2761 if (reuse == MAT_INITIAL_MATRIX){ 2762 /* compute rownz_max for preallocation */ 2763 for (imdex=0; imdex<nrecvs; imdex++){ 2764 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2765 rptr = rbuf_j[imdex]; 2766 for (i=0; i<j; i++){ 2767 ncols = rptr[i+1] - rptr[i]; 2768 if (rownz_max < ncols) rownz_max = ncols; 2769 } 2770 } 2771 2772 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2773 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2774 ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs); CHKERRQ(ierr); 2775 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2776 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2777 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2778 } else { 2779 C = *matredundant; 2780 } 2781 2782 /* insert local matrix entries */ 2783 rptr = sbuf_j; 2784 cols = sbuf_j + rend-rstart + 1; 2785 vals = sbuf_a; 2786 for (i=0; i<rend-rstart; i++){ 2787 row = i + rstart; 2788 ncols = rptr[i+1] - rptr[i]; 2789 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2790 vals += ncols; 2791 cols += ncols; 2792 } 2793 /* insert received matrix entries */ 2794 for (imdex=0; imdex<nrecvs; imdex++){ 2795 rstart = rowrange[recv_rank[imdex]]; 2796 rend = rowrange[recv_rank[imdex]+1]; 2797 rptr = rbuf_j[imdex]; 2798 cols = rbuf_j[imdex] + rend-rstart + 1; 2799 vals = rbuf_a[imdex]; 2800 for (i=0; i<rend-rstart; i++){ 2801 row = i + rstart; 2802 ncols = rptr[i+1] - rptr[i]; 2803 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2804 vals += ncols; 2805 cols += ncols; 2806 } 2807 } 2808 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2809 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2810 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2811 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2812 if (reuse == MAT_INITIAL_MATRIX) { 2813 PetscContainer container; 2814 *matredundant = C; 2815 /* create a supporting struct and attach it to C for reuse */ 2816 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2817 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2818 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2819 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2820 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2821 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 2822 2823 redund->nzlocal = nzlocal; 2824 redund->nsends = nsends; 2825 redund->nrecvs = nrecvs; 2826 redund->send_rank = send_rank; 2827 redund->recv_rank = recv_rank; 2828 redund->sbuf_nz = sbuf_nz; 2829 redund->rbuf_nz = rbuf_nz; 2830 redund->sbuf_j = sbuf_j; 2831 redund->sbuf_a = sbuf_a; 2832 redund->rbuf_j = rbuf_j; 2833 redund->rbuf_a = rbuf_a; 2834 2835 redund->Destroy = C->ops->destroy; 2836 C->ops->destroy = MatDestroy_MatRedundant; 2837 } 2838 PetscFunctionReturn(0); 2839 } 2840 2841 #undef __FUNCT__ 2842 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2843 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2844 { 2845 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2846 PetscErrorCode ierr; 2847 PetscInt i,*idxb = 0; 2848 PetscScalar *va,*vb; 2849 Vec vtmp; 2850 2851 PetscFunctionBegin; 2852 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2853 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2854 if (idx) { 2855 for (i=0; i<A->rmap->n; i++) { 2856 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2857 } 2858 } 2859 2860 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2861 if (idx) { 2862 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2863 } 2864 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2865 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2866 2867 for (i=0; i<A->rmap->n; i++){ 2868 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2869 va[i] = vb[i]; 2870 if (idx) idx[i] = a->garray[idxb[i]]; 2871 } 2872 } 2873 2874 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2875 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2876 ierr = PetscFree(idxb);CHKERRQ(ierr); 2877 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2878 PetscFunctionReturn(0); 2879 } 2880 2881 #undef __FUNCT__ 2882 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2883 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2884 { 2885 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2886 PetscErrorCode ierr; 2887 PetscInt i,*idxb = 0; 2888 PetscScalar *va,*vb; 2889 Vec vtmp; 2890 2891 PetscFunctionBegin; 2892 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2893 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2894 if (idx) { 2895 for (i=0; i<A->cmap->n; i++) { 2896 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2897 } 2898 } 2899 2900 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2901 if (idx) { 2902 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2903 } 2904 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2905 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2906 2907 for (i=0; i<A->rmap->n; i++){ 2908 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2909 va[i] = vb[i]; 2910 if (idx) idx[i] = a->garray[idxb[i]]; 2911 } 2912 } 2913 2914 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2915 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2916 ierr = PetscFree(idxb);CHKERRQ(ierr); 2917 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2918 PetscFunctionReturn(0); 2919 } 2920 2921 #undef __FUNCT__ 2922 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2923 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2924 { 2925 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2926 PetscInt n = A->rmap->n; 2927 PetscInt cstart = A->cmap->rstart; 2928 PetscInt *cmap = mat->garray; 2929 PetscInt *diagIdx, *offdiagIdx; 2930 Vec diagV, offdiagV; 2931 PetscScalar *a, *diagA, *offdiagA; 2932 PetscInt r; 2933 PetscErrorCode ierr; 2934 2935 PetscFunctionBegin; 2936 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2937 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2938 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2939 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2940 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2941 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2942 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2943 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2944 for (r = 0; r < n; ++r) { 2945 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2946 a[r] = diagA[r]; 2947 idx[r] = cstart + diagIdx[r]; 2948 } else { 2949 a[r] = offdiagA[r]; 2950 idx[r] = cmap[offdiagIdx[r]]; 2951 } 2952 } 2953 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2954 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2955 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2956 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2957 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2958 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2959 PetscFunctionReturn(0); 2960 } 2961 2962 #undef __FUNCT__ 2963 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2964 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2965 { 2966 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2967 PetscInt n = A->rmap->n; 2968 PetscInt cstart = A->cmap->rstart; 2969 PetscInt *cmap = mat->garray; 2970 PetscInt *diagIdx, *offdiagIdx; 2971 Vec diagV, offdiagV; 2972 PetscScalar *a, *diagA, *offdiagA; 2973 PetscInt r; 2974 PetscErrorCode ierr; 2975 2976 PetscFunctionBegin; 2977 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2978 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2979 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2980 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2981 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2982 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2983 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2984 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2985 for (r = 0; r < n; ++r) { 2986 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2987 a[r] = diagA[r]; 2988 idx[r] = cstart + diagIdx[r]; 2989 } else { 2990 a[r] = offdiagA[r]; 2991 idx[r] = cmap[offdiagIdx[r]]; 2992 } 2993 } 2994 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2995 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2996 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2997 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2998 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2999 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3000 PetscFunctionReturn(0); 3001 } 3002 3003 #undef __FUNCT__ 3004 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 3005 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 3006 { 3007 PetscErrorCode ierr; 3008 Mat *dummy; 3009 3010 PetscFunctionBegin; 3011 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 3012 *newmat = *dummy; 3013 ierr = PetscFree(dummy);CHKERRQ(ierr); 3014 PetscFunctionReturn(0); 3015 } 3016 3017 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 3018 3019 #undef __FUNCT__ 3020 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 3021 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3022 { 3023 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3024 PetscErrorCode ierr; 3025 3026 PetscFunctionBegin; 3027 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3028 PetscFunctionReturn(0); 3029 } 3030 3031 #undef __FUNCT__ 3032 #define __FUNCT__ "MatSetRandom_MPIAIJ" 3033 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 3034 { 3035 PetscErrorCode ierr; 3036 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 3037 3038 PetscFunctionBegin; 3039 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 3040 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 3041 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3042 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3043 PetscFunctionReturn(0); 3044 } 3045 3046 /* -------------------------------------------------------------------*/ 3047 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3048 MatGetRow_MPIAIJ, 3049 MatRestoreRow_MPIAIJ, 3050 MatMult_MPIAIJ, 3051 /* 4*/ MatMultAdd_MPIAIJ, 3052 MatMultTranspose_MPIAIJ, 3053 MatMultTransposeAdd_MPIAIJ, 3054 #ifdef PETSC_HAVE_PBGL 3055 MatSolve_MPIAIJ, 3056 #else 3057 0, 3058 #endif 3059 0, 3060 0, 3061 /*10*/ 0, 3062 0, 3063 0, 3064 MatSOR_MPIAIJ, 3065 MatTranspose_MPIAIJ, 3066 /*15*/ MatGetInfo_MPIAIJ, 3067 MatEqual_MPIAIJ, 3068 MatGetDiagonal_MPIAIJ, 3069 MatDiagonalScale_MPIAIJ, 3070 MatNorm_MPIAIJ, 3071 /*20*/ MatAssemblyBegin_MPIAIJ, 3072 MatAssemblyEnd_MPIAIJ, 3073 MatSetOption_MPIAIJ, 3074 MatZeroEntries_MPIAIJ, 3075 /*24*/ MatZeroRows_MPIAIJ, 3076 0, 3077 #ifdef PETSC_HAVE_PBGL 3078 0, 3079 #else 3080 0, 3081 #endif 3082 0, 3083 0, 3084 /*29*/ MatSetUp_MPIAIJ, 3085 #ifdef PETSC_HAVE_PBGL 3086 0, 3087 #else 3088 0, 3089 #endif 3090 0, 3091 0, 3092 0, 3093 /*34*/ MatDuplicate_MPIAIJ, 3094 0, 3095 0, 3096 0, 3097 0, 3098 /*39*/ MatAXPY_MPIAIJ, 3099 MatGetSubMatrices_MPIAIJ, 3100 MatIncreaseOverlap_MPIAIJ, 3101 MatGetValues_MPIAIJ, 3102 MatCopy_MPIAIJ, 3103 /*44*/ MatGetRowMax_MPIAIJ, 3104 MatScale_MPIAIJ, 3105 0, 3106 0, 3107 MatZeroRowsColumns_MPIAIJ, 3108 /*49*/ MatSetRandom_MPIAIJ, 3109 0, 3110 0, 3111 0, 3112 0, 3113 /*54*/ MatFDColoringCreate_MPIAIJ, 3114 0, 3115 MatSetUnfactored_MPIAIJ, 3116 0, /* MatPermute_MPIAIJ, impl currently broken */ 3117 0, 3118 /*59*/ MatGetSubMatrix_MPIAIJ, 3119 MatDestroy_MPIAIJ, 3120 MatView_MPIAIJ, 3121 0, 3122 0, 3123 /*64*/ 0, 3124 0, 3125 0, 3126 0, 3127 0, 3128 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3129 MatGetRowMinAbs_MPIAIJ, 3130 0, 3131 MatSetColoring_MPIAIJ, 3132 #if defined(PETSC_HAVE_ADIC) 3133 MatSetValuesAdic_MPIAIJ, 3134 #else 3135 0, 3136 #endif 3137 MatSetValuesAdifor_MPIAIJ, 3138 /*75*/ MatFDColoringApply_AIJ, 3139 0, 3140 0, 3141 0, 3142 MatFindZeroDiagonals_MPIAIJ, 3143 /*80*/ 0, 3144 0, 3145 0, 3146 /*83*/ MatLoad_MPIAIJ, 3147 0, 3148 0, 3149 0, 3150 0, 3151 0, 3152 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3153 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3154 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3155 MatPtAP_Basic, 3156 MatPtAPSymbolic_MPIAIJ, 3157 /*94*/ MatPtAPNumeric_MPIAIJ, 3158 0, 3159 0, 3160 0, 3161 0, 3162 /*99*/ 0, 3163 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3164 MatPtAPNumeric_MPIAIJ_MPIAIJ, 3165 MatConjugate_MPIAIJ, 3166 0, 3167 /*104*/MatSetValuesRow_MPIAIJ, 3168 MatRealPart_MPIAIJ, 3169 MatImaginaryPart_MPIAIJ, 3170 0, 3171 0, 3172 /*109*/0, 3173 MatGetRedundantMatrix_MPIAIJ, 3174 MatGetRowMin_MPIAIJ, 3175 0, 3176 0, 3177 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3178 0, 3179 0, 3180 0, 3181 0, 3182 /*119*/0, 3183 0, 3184 0, 3185 0, 3186 MatGetMultiProcBlock_MPIAIJ, 3187 /*124*/MatFindNonzeroRows_MPIAIJ, 3188 MatGetColumnNorms_MPIAIJ, 3189 MatInvertBlockDiagonal_MPIAIJ, 3190 0, 3191 MatGetSubMatricesParallel_MPIAIJ, 3192 /*129*/0, 3193 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3194 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3195 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3196 0, 3197 /*134*/0, 3198 0, 3199 0, 3200 0, 3201 0 3202 }; 3203 3204 /* ----------------------------------------------------------------------------------------*/ 3205 3206 EXTERN_C_BEGIN 3207 #undef __FUNCT__ 3208 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3209 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3210 { 3211 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3212 PetscErrorCode ierr; 3213 3214 PetscFunctionBegin; 3215 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3216 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3217 PetscFunctionReturn(0); 3218 } 3219 EXTERN_C_END 3220 3221 EXTERN_C_BEGIN 3222 #undef __FUNCT__ 3223 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3224 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3225 { 3226 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3227 PetscErrorCode ierr; 3228 3229 PetscFunctionBegin; 3230 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3231 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3232 PetscFunctionReturn(0); 3233 } 3234 EXTERN_C_END 3235 3236 EXTERN_C_BEGIN 3237 #undef __FUNCT__ 3238 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3239 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3240 { 3241 Mat_MPIAIJ *b; 3242 PetscErrorCode ierr; 3243 PetscInt i; 3244 PetscBool d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE; 3245 3246 PetscFunctionBegin; 3247 if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE; 3248 if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE; 3249 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3250 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3251 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3252 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3253 3254 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3255 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3256 if (d_nnz) { 3257 for (i=0; i<B->rmap->n; i++) { 3258 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 3259 } 3260 } 3261 if (o_nnz) { 3262 for (i=0; i<B->rmap->n; i++) { 3263 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 3264 } 3265 } 3266 b = (Mat_MPIAIJ*)B->data; 3267 3268 if (!B->preallocated) { 3269 /* Explicitly create 2 MATSEQAIJ matrices. */ 3270 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3271 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3272 ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3273 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3274 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3275 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3276 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3277 ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3278 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3279 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3280 } 3281 3282 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3283 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3284 /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */ 3285 if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3286 if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3287 B->preallocated = PETSC_TRUE; 3288 PetscFunctionReturn(0); 3289 } 3290 EXTERN_C_END 3291 3292 #undef __FUNCT__ 3293 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3294 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3295 { 3296 Mat mat; 3297 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3298 PetscErrorCode ierr; 3299 3300 PetscFunctionBegin; 3301 *newmat = 0; 3302 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3303 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3304 ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr); 3305 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3306 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3307 a = (Mat_MPIAIJ*)mat->data; 3308 3309 mat->factortype = matin->factortype; 3310 mat->rmap->bs = matin->rmap->bs; 3311 mat->cmap->bs = matin->cmap->bs; 3312 mat->assembled = PETSC_TRUE; 3313 mat->insertmode = NOT_SET_VALUES; 3314 mat->preallocated = PETSC_TRUE; 3315 3316 a->size = oldmat->size; 3317 a->rank = oldmat->rank; 3318 a->donotstash = oldmat->donotstash; 3319 a->roworiented = oldmat->roworiented; 3320 a->rowindices = 0; 3321 a->rowvalues = 0; 3322 a->getrowactive = PETSC_FALSE; 3323 3324 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3325 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3326 3327 if (oldmat->colmap) { 3328 #if defined (PETSC_USE_CTABLE) 3329 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3330 #else 3331 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3332 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3333 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3334 #endif 3335 } else a->colmap = 0; 3336 if (oldmat->garray) { 3337 PetscInt len; 3338 len = oldmat->B->cmap->n; 3339 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3340 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3341 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3342 } else a->garray = 0; 3343 3344 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3345 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3346 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3347 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3348 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3349 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3350 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3351 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3352 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3353 *newmat = mat; 3354 PetscFunctionReturn(0); 3355 } 3356 3357 3358 3359 #undef __FUNCT__ 3360 #define __FUNCT__ "MatLoad_MPIAIJ" 3361 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3362 { 3363 PetscScalar *vals,*svals; 3364 MPI_Comm comm = ((PetscObject)viewer)->comm; 3365 PetscErrorCode ierr; 3366 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3367 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3368 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3369 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 3370 PetscInt cend,cstart,n,*rowners,sizesset=1; 3371 int fd; 3372 PetscInt bs = 1; 3373 3374 PetscFunctionBegin; 3375 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3376 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3377 if (!rank) { 3378 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3379 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3380 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3381 } 3382 3383 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3384 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3385 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3386 3387 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3388 3389 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3390 M = header[1]; N = header[2]; 3391 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3392 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3393 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3394 3395 /* If global sizes are set, check if they are consistent with that given in the file */ 3396 if (sizesset) { 3397 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3398 } 3399 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3400 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3401 3402 /* determine ownership of all (block) rows */ 3403 if ( M%bs ) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3404 if (newMat->rmap->n < 0 ) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3405 else m = newMat->rmap->n; /* Set by user */ 3406 3407 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3408 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3409 3410 /* First process needs enough room for process with most rows */ 3411 if (!rank) { 3412 mmax = rowners[1]; 3413 for (i=2; i<=size; i++) { 3414 mmax = PetscMax(mmax, rowners[i]); 3415 } 3416 } else mmax = m; 3417 3418 rowners[0] = 0; 3419 for (i=2; i<=size; i++) { 3420 rowners[i] += rowners[i-1]; 3421 } 3422 rstart = rowners[rank]; 3423 rend = rowners[rank+1]; 3424 3425 /* distribute row lengths to all processors */ 3426 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 3427 if (!rank) { 3428 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3429 ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3430 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3431 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3432 for (j=0; j<m; j++) { 3433 procsnz[0] += ourlens[j]; 3434 } 3435 for (i=1; i<size; i++) { 3436 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3437 /* calculate the number of nonzeros on each processor */ 3438 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3439 procsnz[i] += rowlengths[j]; 3440 } 3441 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3442 } 3443 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3444 } else { 3445 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3446 } 3447 3448 if (!rank) { 3449 /* determine max buffer needed and allocate it */ 3450 maxnz = 0; 3451 for (i=0; i<size; i++) { 3452 maxnz = PetscMax(maxnz,procsnz[i]); 3453 } 3454 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3455 3456 /* read in my part of the matrix column indices */ 3457 nz = procsnz[0]; 3458 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3459 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3460 3461 /* read in every one elses and ship off */ 3462 for (i=1; i<size; i++) { 3463 nz = procsnz[i]; 3464 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3465 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3466 } 3467 ierr = PetscFree(cols);CHKERRQ(ierr); 3468 } else { 3469 /* determine buffer space needed for message */ 3470 nz = 0; 3471 for (i=0; i<m; i++) { 3472 nz += ourlens[i]; 3473 } 3474 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3475 3476 /* receive message of column indices*/ 3477 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3478 } 3479 3480 /* determine column ownership if matrix is not square */ 3481 if (N != M) { 3482 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3483 else n = newMat->cmap->n; 3484 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3485 cstart = cend - n; 3486 } else { 3487 cstart = rstart; 3488 cend = rend; 3489 n = cend - cstart; 3490 } 3491 3492 /* loop over local rows, determining number of off diagonal entries */ 3493 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3494 jj = 0; 3495 for (i=0; i<m; i++) { 3496 for (j=0; j<ourlens[i]; j++) { 3497 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3498 jj++; 3499 } 3500 } 3501 3502 for (i=0; i<m; i++) { 3503 ourlens[i] -= offlens[i]; 3504 } 3505 if (!sizesset) { 3506 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3507 } 3508 3509 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3510 3511 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3512 3513 for (i=0; i<m; i++) { 3514 ourlens[i] += offlens[i]; 3515 } 3516 3517 if (!rank) { 3518 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3519 3520 /* read in my part of the matrix numerical values */ 3521 nz = procsnz[0]; 3522 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3523 3524 /* insert into matrix */ 3525 jj = rstart; 3526 smycols = mycols; 3527 svals = vals; 3528 for (i=0; i<m; i++) { 3529 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3530 smycols += ourlens[i]; 3531 svals += ourlens[i]; 3532 jj++; 3533 } 3534 3535 /* read in other processors and ship out */ 3536 for (i=1; i<size; i++) { 3537 nz = procsnz[i]; 3538 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3539 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3540 } 3541 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3542 } else { 3543 /* receive numeric values */ 3544 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3545 3546 /* receive message of values*/ 3547 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3548 3549 /* insert into matrix */ 3550 jj = rstart; 3551 smycols = mycols; 3552 svals = vals; 3553 for (i=0; i<m; i++) { 3554 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3555 smycols += ourlens[i]; 3556 svals += ourlens[i]; 3557 jj++; 3558 } 3559 } 3560 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3561 ierr = PetscFree(vals);CHKERRQ(ierr); 3562 ierr = PetscFree(mycols);CHKERRQ(ierr); 3563 ierr = PetscFree(rowners);CHKERRQ(ierr); 3564 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3565 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3566 3567 PetscFunctionReturn(0); 3568 } 3569 3570 #undef __FUNCT__ 3571 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3572 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3573 { 3574 PetscErrorCode ierr; 3575 IS iscol_local; 3576 PetscInt csize; 3577 3578 PetscFunctionBegin; 3579 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3580 if (call == MAT_REUSE_MATRIX) { 3581 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3582 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3583 } else { 3584 PetscInt cbs; 3585 ierr = ISGetBlockSize(iscol,&cbs); CHKERRQ(ierr); 3586 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3587 ierr = ISSetBlockSize(iscol_local,cbs); CHKERRQ(ierr); 3588 } 3589 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3590 if (call == MAT_INITIAL_MATRIX) { 3591 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3592 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3593 } 3594 PetscFunctionReturn(0); 3595 } 3596 3597 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3598 #undef __FUNCT__ 3599 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3600 /* 3601 Not great since it makes two copies of the submatrix, first an SeqAIJ 3602 in local and then by concatenating the local matrices the end result. 3603 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3604 3605 Note: This requires a sequential iscol with all indices. 3606 */ 3607 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3608 { 3609 PetscErrorCode ierr; 3610 PetscMPIInt rank,size; 3611 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3612 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3613 PetscBool allcolumns, colflag; 3614 Mat M,Mreuse; 3615 MatScalar *vwork,*aa; 3616 MPI_Comm comm = ((PetscObject)mat)->comm; 3617 Mat_SeqAIJ *aij; 3618 3619 3620 PetscFunctionBegin; 3621 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3622 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3623 3624 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3625 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3626 if (colflag && ncol == mat->cmap->N){ 3627 allcolumns = PETSC_TRUE; 3628 } else { 3629 allcolumns = PETSC_FALSE; 3630 } 3631 if (call == MAT_REUSE_MATRIX) { 3632 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3633 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3634 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3635 } else { 3636 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3637 } 3638 3639 /* 3640 m - number of local rows 3641 n - number of columns (same on all processors) 3642 rstart - first row in new global matrix generated 3643 */ 3644 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3645 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3646 if (call == MAT_INITIAL_MATRIX) { 3647 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3648 ii = aij->i; 3649 jj = aij->j; 3650 3651 /* 3652 Determine the number of non-zeros in the diagonal and off-diagonal 3653 portions of the matrix in order to do correct preallocation 3654 */ 3655 3656 /* first get start and end of "diagonal" columns */ 3657 if (csize == PETSC_DECIDE) { 3658 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3659 if (mglobal == n) { /* square matrix */ 3660 nlocal = m; 3661 } else { 3662 nlocal = n/size + ((n % size) > rank); 3663 } 3664 } else { 3665 nlocal = csize; 3666 } 3667 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3668 rstart = rend - nlocal; 3669 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3670 3671 /* next, compute all the lengths */ 3672 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3673 olens = dlens + m; 3674 for (i=0; i<m; i++) { 3675 jend = ii[i+1] - ii[i]; 3676 olen = 0; 3677 dlen = 0; 3678 for (j=0; j<jend; j++) { 3679 if (*jj < rstart || *jj >= rend) olen++; 3680 else dlen++; 3681 jj++; 3682 } 3683 olens[i] = olen; 3684 dlens[i] = dlen; 3685 } 3686 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3687 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3688 ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr); 3689 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3690 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3691 ierr = PetscFree(dlens);CHKERRQ(ierr); 3692 } else { 3693 PetscInt ml,nl; 3694 3695 M = *newmat; 3696 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3697 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3698 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3699 /* 3700 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3701 rather than the slower MatSetValues(). 3702 */ 3703 M->was_assembled = PETSC_TRUE; 3704 M->assembled = PETSC_FALSE; 3705 } 3706 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3707 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3708 ii = aij->i; 3709 jj = aij->j; 3710 aa = aij->a; 3711 for (i=0; i<m; i++) { 3712 row = rstart + i; 3713 nz = ii[i+1] - ii[i]; 3714 cwork = jj; jj += nz; 3715 vwork = aa; aa += nz; 3716 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3717 } 3718 3719 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3720 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3721 *newmat = M; 3722 3723 /* save submatrix used in processor for next request */ 3724 if (call == MAT_INITIAL_MATRIX) { 3725 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3726 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3727 } 3728 3729 PetscFunctionReturn(0); 3730 } 3731 3732 EXTERN_C_BEGIN 3733 #undef __FUNCT__ 3734 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3735 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3736 { 3737 PetscInt m,cstart, cend,j,nnz,i,d; 3738 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3739 const PetscInt *JJ; 3740 PetscScalar *values; 3741 PetscErrorCode ierr; 3742 3743 PetscFunctionBegin; 3744 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3745 3746 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3747 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3748 m = B->rmap->n; 3749 cstart = B->cmap->rstart; 3750 cend = B->cmap->rend; 3751 rstart = B->rmap->rstart; 3752 3753 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3754 3755 #if defined(PETSC_USE_DEBUGGING) 3756 for (i=0; i<m; i++) { 3757 nnz = Ii[i+1]- Ii[i]; 3758 JJ = J + Ii[i]; 3759 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3760 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3761 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3762 } 3763 #endif 3764 3765 for (i=0; i<m; i++) { 3766 nnz = Ii[i+1]- Ii[i]; 3767 JJ = J + Ii[i]; 3768 nnz_max = PetscMax(nnz_max,nnz); 3769 d = 0; 3770 for (j=0; j<nnz; j++) { 3771 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3772 } 3773 d_nnz[i] = d; 3774 o_nnz[i] = nnz - d; 3775 } 3776 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3777 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3778 3779 if (v) values = (PetscScalar*)v; 3780 else { 3781 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3782 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3783 } 3784 3785 for (i=0; i<m; i++) { 3786 ii = i + rstart; 3787 nnz = Ii[i+1]- Ii[i]; 3788 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3789 } 3790 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3791 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3792 3793 if (!v) { 3794 ierr = PetscFree(values);CHKERRQ(ierr); 3795 } 3796 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3797 PetscFunctionReturn(0); 3798 } 3799 EXTERN_C_END 3800 3801 #undef __FUNCT__ 3802 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3803 /*@ 3804 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3805 (the default parallel PETSc format). 3806 3807 Collective on MPI_Comm 3808 3809 Input Parameters: 3810 + B - the matrix 3811 . i - the indices into j for the start of each local row (starts with zero) 3812 . j - the column indices for each local row (starts with zero) 3813 - v - optional values in the matrix 3814 3815 Level: developer 3816 3817 Notes: 3818 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3819 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3820 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3821 3822 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3823 3824 The format which is used for the sparse matrix input, is equivalent to a 3825 row-major ordering.. i.e for the following matrix, the input data expected is 3826 as shown: 3827 3828 1 0 0 3829 2 0 3 P0 3830 ------- 3831 4 5 6 P1 3832 3833 Process0 [P0]: rows_owned=[0,1] 3834 i = {0,1,3} [size = nrow+1 = 2+1] 3835 j = {0,0,2} [size = nz = 6] 3836 v = {1,2,3} [size = nz = 6] 3837 3838 Process1 [P1]: rows_owned=[2] 3839 i = {0,3} [size = nrow+1 = 1+1] 3840 j = {0,1,2} [size = nz = 6] 3841 v = {4,5,6} [size = nz = 6] 3842 3843 .keywords: matrix, aij, compressed row, sparse, parallel 3844 3845 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3846 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3847 @*/ 3848 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3849 { 3850 PetscErrorCode ierr; 3851 3852 PetscFunctionBegin; 3853 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3854 PetscFunctionReturn(0); 3855 } 3856 3857 #undef __FUNCT__ 3858 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3859 /*@C 3860 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3861 (the default parallel PETSc format). For good matrix assembly performance 3862 the user should preallocate the matrix storage by setting the parameters 3863 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3864 performance can be increased by more than a factor of 50. 3865 3866 Collective on MPI_Comm 3867 3868 Input Parameters: 3869 + A - the matrix 3870 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3871 (same value is used for all local rows) 3872 . d_nnz - array containing the number of nonzeros in the various rows of the 3873 DIAGONAL portion of the local submatrix (possibly different for each row) 3874 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3875 The size of this array is equal to the number of local rows, i.e 'm'. 3876 For matrices that will be factored, you must leave room for (and set) 3877 the diagonal entry even if it is zero. 3878 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3879 submatrix (same value is used for all local rows). 3880 - o_nnz - array containing the number of nonzeros in the various rows of the 3881 OFF-DIAGONAL portion of the local submatrix (possibly different for 3882 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3883 structure. The size of this array is equal to the number 3884 of local rows, i.e 'm'. 3885 3886 If the *_nnz parameter is given then the *_nz parameter is ignored 3887 3888 The AIJ format (also called the Yale sparse matrix format or 3889 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3890 storage. The stored row and column indices begin with zero. 3891 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3892 3893 The parallel matrix is partitioned such that the first m0 rows belong to 3894 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3895 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3896 3897 The DIAGONAL portion of the local submatrix of a processor can be defined 3898 as the submatrix which is obtained by extraction the part corresponding to 3899 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3900 first row that belongs to the processor, r2 is the last row belonging to 3901 the this processor, and c1-c2 is range of indices of the local part of a 3902 vector suitable for applying the matrix to. This is an mxn matrix. In the 3903 common case of a square matrix, the row and column ranges are the same and 3904 the DIAGONAL part is also square. The remaining portion of the local 3905 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3906 3907 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3908 3909 You can call MatGetInfo() to get information on how effective the preallocation was; 3910 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3911 You can also run with the option -info and look for messages with the string 3912 malloc in them to see if additional memory allocation was needed. 3913 3914 Example usage: 3915 3916 Consider the following 8x8 matrix with 34 non-zero values, that is 3917 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3918 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3919 as follows: 3920 3921 .vb 3922 1 2 0 | 0 3 0 | 0 4 3923 Proc0 0 5 6 | 7 0 0 | 8 0 3924 9 0 10 | 11 0 0 | 12 0 3925 ------------------------------------- 3926 13 0 14 | 15 16 17 | 0 0 3927 Proc1 0 18 0 | 19 20 21 | 0 0 3928 0 0 0 | 22 23 0 | 24 0 3929 ------------------------------------- 3930 Proc2 25 26 27 | 0 0 28 | 29 0 3931 30 0 0 | 31 32 33 | 0 34 3932 .ve 3933 3934 This can be represented as a collection of submatrices as: 3935 3936 .vb 3937 A B C 3938 D E F 3939 G H I 3940 .ve 3941 3942 Where the submatrices A,B,C are owned by proc0, D,E,F are 3943 owned by proc1, G,H,I are owned by proc2. 3944 3945 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3946 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3947 The 'M','N' parameters are 8,8, and have the same values on all procs. 3948 3949 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3950 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3951 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3952 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3953 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3954 matrix, ans [DF] as another SeqAIJ matrix. 3955 3956 When d_nz, o_nz parameters are specified, d_nz storage elements are 3957 allocated for every row of the local diagonal submatrix, and o_nz 3958 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3959 One way to choose d_nz and o_nz is to use the max nonzerors per local 3960 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3961 In this case, the values of d_nz,o_nz are: 3962 .vb 3963 proc0 : dnz = 2, o_nz = 2 3964 proc1 : dnz = 3, o_nz = 2 3965 proc2 : dnz = 1, o_nz = 4 3966 .ve 3967 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3968 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3969 for proc3. i.e we are using 12+15+10=37 storage locations to store 3970 34 values. 3971 3972 When d_nnz, o_nnz parameters are specified, the storage is specified 3973 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3974 In the above case the values for d_nnz,o_nnz are: 3975 .vb 3976 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3977 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3978 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3979 .ve 3980 Here the space allocated is sum of all the above values i.e 34, and 3981 hence pre-allocation is perfect. 3982 3983 Level: intermediate 3984 3985 .keywords: matrix, aij, compressed row, sparse, parallel 3986 3987 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3988 MPIAIJ, MatGetInfo() 3989 @*/ 3990 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3991 { 3992 PetscErrorCode ierr; 3993 3994 PetscFunctionBegin; 3995 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3996 PetscValidType(B,1); 3997 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3998 PetscFunctionReturn(0); 3999 } 4000 4001 #undef __FUNCT__ 4002 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 4003 /*@ 4004 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4005 CSR format the local rows. 4006 4007 Collective on MPI_Comm 4008 4009 Input Parameters: 4010 + comm - MPI communicator 4011 . m - number of local rows (Cannot be PETSC_DECIDE) 4012 . n - This value should be the same as the local size used in creating the 4013 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4014 calculated if N is given) For square matrices n is almost always m. 4015 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4016 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4017 . i - row indices 4018 . j - column indices 4019 - a - matrix values 4020 4021 Output Parameter: 4022 . mat - the matrix 4023 4024 Level: intermediate 4025 4026 Notes: 4027 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4028 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4029 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4030 4031 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4032 4033 The format which is used for the sparse matrix input, is equivalent to a 4034 row-major ordering.. i.e for the following matrix, the input data expected is 4035 as shown: 4036 4037 1 0 0 4038 2 0 3 P0 4039 ------- 4040 4 5 6 P1 4041 4042 Process0 [P0]: rows_owned=[0,1] 4043 i = {0,1,3} [size = nrow+1 = 2+1] 4044 j = {0,0,2} [size = nz = 6] 4045 v = {1,2,3} [size = nz = 6] 4046 4047 Process1 [P1]: rows_owned=[2] 4048 i = {0,3} [size = nrow+1 = 1+1] 4049 j = {0,1,2} [size = nz = 6] 4050 v = {4,5,6} [size = nz = 6] 4051 4052 .keywords: matrix, aij, compressed row, sparse, parallel 4053 4054 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4055 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4056 @*/ 4057 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4058 { 4059 PetscErrorCode ierr; 4060 4061 PetscFunctionBegin; 4062 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4063 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4064 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4065 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4066 /* ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr); */ 4067 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4068 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4069 PetscFunctionReturn(0); 4070 } 4071 4072 #undef __FUNCT__ 4073 #define __FUNCT__ "MatCreateAIJ" 4074 /*@C 4075 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4076 (the default parallel PETSc format). For good matrix assembly performance 4077 the user should preallocate the matrix storage by setting the parameters 4078 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4079 performance can be increased by more than a factor of 50. 4080 4081 Collective on MPI_Comm 4082 4083 Input Parameters: 4084 + comm - MPI communicator 4085 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4086 This value should be the same as the local size used in creating the 4087 y vector for the matrix-vector product y = Ax. 4088 . n - This value should be the same as the local size used in creating the 4089 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4090 calculated if N is given) For square matrices n is almost always m. 4091 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4092 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4093 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4094 (same value is used for all local rows) 4095 . d_nnz - array containing the number of nonzeros in the various rows of the 4096 DIAGONAL portion of the local submatrix (possibly different for each row) 4097 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 4098 The size of this array is equal to the number of local rows, i.e 'm'. 4099 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4100 submatrix (same value is used for all local rows). 4101 - o_nnz - array containing the number of nonzeros in the various rows of the 4102 OFF-DIAGONAL portion of the local submatrix (possibly different for 4103 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 4104 structure. The size of this array is equal to the number 4105 of local rows, i.e 'm'. 4106 4107 Output Parameter: 4108 . A - the matrix 4109 4110 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4111 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4112 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4113 4114 Notes: 4115 If the *_nnz parameter is given then the *_nz parameter is ignored 4116 4117 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4118 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4119 storage requirements for this matrix. 4120 4121 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4122 processor than it must be used on all processors that share the object for 4123 that argument. 4124 4125 The user MUST specify either the local or global matrix dimensions 4126 (possibly both). 4127 4128 The parallel matrix is partitioned across processors such that the 4129 first m0 rows belong to process 0, the next m1 rows belong to 4130 process 1, the next m2 rows belong to process 2 etc.. where 4131 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4132 values corresponding to [m x N] submatrix. 4133 4134 The columns are logically partitioned with the n0 columns belonging 4135 to 0th partition, the next n1 columns belonging to the next 4136 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4137 4138 The DIAGONAL portion of the local submatrix on any given processor 4139 is the submatrix corresponding to the rows and columns m,n 4140 corresponding to the given processor. i.e diagonal matrix on 4141 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4142 etc. The remaining portion of the local submatrix [m x (N-n)] 4143 constitute the OFF-DIAGONAL portion. The example below better 4144 illustrates this concept. 4145 4146 For a square global matrix we define each processor's diagonal portion 4147 to be its local rows and the corresponding columns (a square submatrix); 4148 each processor's off-diagonal portion encompasses the remainder of the 4149 local matrix (a rectangular submatrix). 4150 4151 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4152 4153 When calling this routine with a single process communicator, a matrix of 4154 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4155 type of communicator, use the construction mechanism: 4156 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4157 4158 By default, this format uses inodes (identical nodes) when possible. 4159 We search for consecutive rows with the same nonzero structure, thereby 4160 reusing matrix information to achieve increased efficiency. 4161 4162 Options Database Keys: 4163 + -mat_no_inode - Do not use inodes 4164 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4165 - -mat_aij_oneindex - Internally use indexing starting at 1 4166 rather than 0. Note that when calling MatSetValues(), 4167 the user still MUST index entries starting at 0! 4168 4169 4170 Example usage: 4171 4172 Consider the following 8x8 matrix with 34 non-zero values, that is 4173 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4174 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4175 as follows: 4176 4177 .vb 4178 1 2 0 | 0 3 0 | 0 4 4179 Proc0 0 5 6 | 7 0 0 | 8 0 4180 9 0 10 | 11 0 0 | 12 0 4181 ------------------------------------- 4182 13 0 14 | 15 16 17 | 0 0 4183 Proc1 0 18 0 | 19 20 21 | 0 0 4184 0 0 0 | 22 23 0 | 24 0 4185 ------------------------------------- 4186 Proc2 25 26 27 | 0 0 28 | 29 0 4187 30 0 0 | 31 32 33 | 0 34 4188 .ve 4189 4190 This can be represented as a collection of submatrices as: 4191 4192 .vb 4193 A B C 4194 D E F 4195 G H I 4196 .ve 4197 4198 Where the submatrices A,B,C are owned by proc0, D,E,F are 4199 owned by proc1, G,H,I are owned by proc2. 4200 4201 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4202 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4203 The 'M','N' parameters are 8,8, and have the same values on all procs. 4204 4205 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4206 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4207 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4208 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4209 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4210 matrix, ans [DF] as another SeqAIJ matrix. 4211 4212 When d_nz, o_nz parameters are specified, d_nz storage elements are 4213 allocated for every row of the local diagonal submatrix, and o_nz 4214 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4215 One way to choose d_nz and o_nz is to use the max nonzerors per local 4216 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4217 In this case, the values of d_nz,o_nz are: 4218 .vb 4219 proc0 : dnz = 2, o_nz = 2 4220 proc1 : dnz = 3, o_nz = 2 4221 proc2 : dnz = 1, o_nz = 4 4222 .ve 4223 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4224 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4225 for proc3. i.e we are using 12+15+10=37 storage locations to store 4226 34 values. 4227 4228 When d_nnz, o_nnz parameters are specified, the storage is specified 4229 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4230 In the above case the values for d_nnz,o_nnz are: 4231 .vb 4232 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4233 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4234 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4235 .ve 4236 Here the space allocated is sum of all the above values i.e 34, and 4237 hence pre-allocation is perfect. 4238 4239 Level: intermediate 4240 4241 .keywords: matrix, aij, compressed row, sparse, parallel 4242 4243 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4244 MPIAIJ, MatCreateMPIAIJWithArrays() 4245 @*/ 4246 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4247 { 4248 PetscErrorCode ierr; 4249 PetscMPIInt size; 4250 4251 PetscFunctionBegin; 4252 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4253 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4254 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4255 if (size > 1) { 4256 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4257 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4258 } else { 4259 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4260 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4261 } 4262 PetscFunctionReturn(0); 4263 } 4264 4265 #undef __FUNCT__ 4266 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4267 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4268 { 4269 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4270 4271 PetscFunctionBegin; 4272 *Ad = a->A; 4273 *Ao = a->B; 4274 *colmap = a->garray; 4275 PetscFunctionReturn(0); 4276 } 4277 4278 #undef __FUNCT__ 4279 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4280 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4281 { 4282 PetscErrorCode ierr; 4283 PetscInt i; 4284 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4285 4286 PetscFunctionBegin; 4287 if (coloring->ctype == IS_COLORING_GLOBAL) { 4288 ISColoringValue *allcolors,*colors; 4289 ISColoring ocoloring; 4290 4291 /* set coloring for diagonal portion */ 4292 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4293 4294 /* set coloring for off-diagonal portion */ 4295 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 4296 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4297 for (i=0; i<a->B->cmap->n; i++) { 4298 colors[i] = allcolors[a->garray[i]]; 4299 } 4300 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4301 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4302 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4303 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4304 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4305 ISColoringValue *colors; 4306 PetscInt *larray; 4307 ISColoring ocoloring; 4308 4309 /* set coloring for diagonal portion */ 4310 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4311 for (i=0; i<a->A->cmap->n; i++) { 4312 larray[i] = i + A->cmap->rstart; 4313 } 4314 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 4315 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4316 for (i=0; i<a->A->cmap->n; i++) { 4317 colors[i] = coloring->colors[larray[i]]; 4318 } 4319 ierr = PetscFree(larray);CHKERRQ(ierr); 4320 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4321 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4322 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4323 4324 /* set coloring for off-diagonal portion */ 4325 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4326 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 4327 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4328 for (i=0; i<a->B->cmap->n; i++) { 4329 colors[i] = coloring->colors[larray[i]]; 4330 } 4331 ierr = PetscFree(larray);CHKERRQ(ierr); 4332 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4333 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4334 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4335 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4336 4337 PetscFunctionReturn(0); 4338 } 4339 4340 #if defined(PETSC_HAVE_ADIC) 4341 #undef __FUNCT__ 4342 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 4343 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 4344 { 4345 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4346 PetscErrorCode ierr; 4347 4348 PetscFunctionBegin; 4349 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 4350 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 4351 PetscFunctionReturn(0); 4352 } 4353 #endif 4354 4355 #undef __FUNCT__ 4356 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4357 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4358 { 4359 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4360 PetscErrorCode ierr; 4361 4362 PetscFunctionBegin; 4363 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4364 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4365 PetscFunctionReturn(0); 4366 } 4367 4368 #undef __FUNCT__ 4369 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4370 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4371 { 4372 PetscErrorCode ierr; 4373 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4374 PetscInt *indx; 4375 4376 PetscFunctionBegin; 4377 /* This routine will ONLY return MPIAIJ type matrix */ 4378 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4379 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4380 if (n == PETSC_DECIDE){ 4381 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4382 } 4383 /* Check sum(n) = N */ 4384 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4385 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4386 4387 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4388 rstart -= m; 4389 4390 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4391 for (i=0;i<m;i++) { 4392 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4393 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4394 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4395 } 4396 4397 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4398 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE); CHKERRQ(ierr); 4399 ierr = MatSetBlockSizes(*outmat,bs,cbs); CHKERRQ(ierr); 4400 ierr = MatSetType(*outmat,MATMPIAIJ); CHKERRQ(ierr); 4401 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4402 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4403 PetscFunctionReturn(0); 4404 } 4405 4406 #undef __FUNCT__ 4407 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4408 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4409 { 4410 PetscErrorCode ierr; 4411 PetscInt m,N,i,rstart,nnz,Ii; 4412 PetscInt *indx; 4413 PetscScalar *values; 4414 4415 PetscFunctionBegin; 4416 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4417 ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 4418 for (i=0;i<m;i++) { 4419 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4420 Ii = i + rstart; 4421 ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4422 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4423 } 4424 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4425 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4426 PetscFunctionReturn(0); 4427 } 4428 4429 #undef __FUNCT__ 4430 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4431 /*@ 4432 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4433 matrices from each processor 4434 4435 Collective on MPI_Comm 4436 4437 Input Parameters: 4438 + comm - the communicators the parallel matrix will live on 4439 . inmat - the input sequential matrices 4440 . n - number of local columns (or PETSC_DECIDE) 4441 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4442 4443 Output Parameter: 4444 . outmat - the parallel matrix generated 4445 4446 Level: advanced 4447 4448 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4449 4450 @*/ 4451 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4452 { 4453 PetscErrorCode ierr; 4454 4455 PetscFunctionBegin; 4456 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4457 if (scall == MAT_INITIAL_MATRIX){ 4458 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4459 } 4460 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4461 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4462 PetscFunctionReturn(0); 4463 } 4464 4465 #undef __FUNCT__ 4466 #define __FUNCT__ "MatFileSplit" 4467 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4468 { 4469 PetscErrorCode ierr; 4470 PetscMPIInt rank; 4471 PetscInt m,N,i,rstart,nnz; 4472 size_t len; 4473 const PetscInt *indx; 4474 PetscViewer out; 4475 char *name; 4476 Mat B; 4477 const PetscScalar *values; 4478 4479 PetscFunctionBegin; 4480 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4481 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4482 /* Should this be the type of the diagonal block of A? */ 4483 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4484 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4485 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 4486 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4487 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4488 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4489 for (i=0;i<m;i++) { 4490 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4491 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4492 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4493 } 4494 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4495 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4496 4497 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4498 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4499 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4500 sprintf(name,"%s.%d",outfile,rank); 4501 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4502 ierr = PetscFree(name); 4503 ierr = MatView(B,out);CHKERRQ(ierr); 4504 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4505 ierr = MatDestroy(&B);CHKERRQ(ierr); 4506 PetscFunctionReturn(0); 4507 } 4508 4509 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4510 #undef __FUNCT__ 4511 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4512 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4513 { 4514 PetscErrorCode ierr; 4515 Mat_Merge_SeqsToMPI *merge; 4516 PetscContainer container; 4517 4518 PetscFunctionBegin; 4519 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4520 if (container) { 4521 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4522 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4523 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4524 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4525 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4526 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4527 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4528 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4529 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4530 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4531 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4532 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4533 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4534 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4535 ierr = PetscFree(merge);CHKERRQ(ierr); 4536 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4537 } 4538 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4539 PetscFunctionReturn(0); 4540 } 4541 4542 #include <../src/mat/utils/freespace.h> 4543 #include <petscbt.h> 4544 4545 #undef __FUNCT__ 4546 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4547 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4548 { 4549 PetscErrorCode ierr; 4550 MPI_Comm comm=((PetscObject)mpimat)->comm; 4551 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4552 PetscMPIInt size,rank,taga,*len_s; 4553 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4554 PetscInt proc,m; 4555 PetscInt **buf_ri,**buf_rj; 4556 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4557 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4558 MPI_Request *s_waits,*r_waits; 4559 MPI_Status *status; 4560 MatScalar *aa=a->a; 4561 MatScalar **abuf_r,*ba_i; 4562 Mat_Merge_SeqsToMPI *merge; 4563 PetscContainer container; 4564 4565 PetscFunctionBegin; 4566 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4567 4568 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4569 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4570 4571 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4572 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4573 4574 bi = merge->bi; 4575 bj = merge->bj; 4576 buf_ri = merge->buf_ri; 4577 buf_rj = merge->buf_rj; 4578 4579 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4580 owners = merge->rowmap->range; 4581 len_s = merge->len_s; 4582 4583 /* send and recv matrix values */ 4584 /*-----------------------------*/ 4585 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4586 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4587 4588 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4589 for (proc=0,k=0; proc<size; proc++){ 4590 if (!len_s[proc]) continue; 4591 i = owners[proc]; 4592 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4593 k++; 4594 } 4595 4596 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4597 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4598 ierr = PetscFree(status);CHKERRQ(ierr); 4599 4600 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4601 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4602 4603 /* insert mat values of mpimat */ 4604 /*----------------------------*/ 4605 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4606 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4607 4608 for (k=0; k<merge->nrecv; k++){ 4609 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4610 nrows = *(buf_ri_k[k]); 4611 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4612 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4613 } 4614 4615 /* set values of ba */ 4616 m = merge->rowmap->n; 4617 for (i=0; i<m; i++) { 4618 arow = owners[rank] + i; 4619 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4620 bnzi = bi[i+1] - bi[i]; 4621 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4622 4623 /* add local non-zero vals of this proc's seqmat into ba */ 4624 anzi = ai[arow+1] - ai[arow]; 4625 aj = a->j + ai[arow]; 4626 aa = a->a + ai[arow]; 4627 nextaj = 0; 4628 for (j=0; nextaj<anzi; j++){ 4629 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4630 ba_i[j] += aa[nextaj++]; 4631 } 4632 } 4633 4634 /* add received vals into ba */ 4635 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4636 /* i-th row */ 4637 if (i == *nextrow[k]) { 4638 anzi = *(nextai[k]+1) - *nextai[k]; 4639 aj = buf_rj[k] + *(nextai[k]); 4640 aa = abuf_r[k] + *(nextai[k]); 4641 nextaj = 0; 4642 for (j=0; nextaj<anzi; j++){ 4643 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4644 ba_i[j] += aa[nextaj++]; 4645 } 4646 } 4647 nextrow[k]++; nextai[k]++; 4648 } 4649 } 4650 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4651 } 4652 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4653 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4654 4655 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4656 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4657 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4658 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4659 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4660 PetscFunctionReturn(0); 4661 } 4662 4663 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4664 4665 #undef __FUNCT__ 4666 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4667 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4668 { 4669 PetscErrorCode ierr; 4670 Mat B_mpi; 4671 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4672 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4673 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4674 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4675 PetscInt len,proc,*dnz,*onz,bs,cbs; 4676 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4677 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4678 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4679 MPI_Status *status; 4680 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4681 PetscBT lnkbt; 4682 Mat_Merge_SeqsToMPI *merge; 4683 PetscContainer container; 4684 4685 PetscFunctionBegin; 4686 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4687 4688 /* make sure it is a PETSc comm */ 4689 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4690 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4691 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4692 4693 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4694 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4695 4696 /* determine row ownership */ 4697 /*---------------------------------------------------------*/ 4698 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4699 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4700 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4701 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4702 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4703 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4704 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4705 4706 m = merge->rowmap->n; 4707 M = merge->rowmap->N; 4708 owners = merge->rowmap->range; 4709 4710 /* determine the number of messages to send, their lengths */ 4711 /*---------------------------------------------------------*/ 4712 len_s = merge->len_s; 4713 4714 len = 0; /* length of buf_si[] */ 4715 merge->nsend = 0; 4716 for (proc=0; proc<size; proc++){ 4717 len_si[proc] = 0; 4718 if (proc == rank){ 4719 len_s[proc] = 0; 4720 } else { 4721 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4722 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4723 } 4724 if (len_s[proc]) { 4725 merge->nsend++; 4726 nrows = 0; 4727 for (i=owners[proc]; i<owners[proc+1]; i++){ 4728 if (ai[i+1] > ai[i]) nrows++; 4729 } 4730 len_si[proc] = 2*(nrows+1); 4731 len += len_si[proc]; 4732 } 4733 } 4734 4735 /* determine the number and length of messages to receive for ij-structure */ 4736 /*-------------------------------------------------------------------------*/ 4737 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4738 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4739 4740 /* post the Irecv of j-structure */ 4741 /*-------------------------------*/ 4742 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4743 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4744 4745 /* post the Isend of j-structure */ 4746 /*--------------------------------*/ 4747 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4748 4749 for (proc=0, k=0; proc<size; proc++){ 4750 if (!len_s[proc]) continue; 4751 i = owners[proc]; 4752 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4753 k++; 4754 } 4755 4756 /* receives and sends of j-structure are complete */ 4757 /*------------------------------------------------*/ 4758 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4759 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4760 4761 /* send and recv i-structure */ 4762 /*---------------------------*/ 4763 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4764 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4765 4766 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4767 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4768 for (proc=0,k=0; proc<size; proc++){ 4769 if (!len_s[proc]) continue; 4770 /* form outgoing message for i-structure: 4771 buf_si[0]: nrows to be sent 4772 [1:nrows]: row index (global) 4773 [nrows+1:2*nrows+1]: i-structure index 4774 */ 4775 /*-------------------------------------------*/ 4776 nrows = len_si[proc]/2 - 1; 4777 buf_si_i = buf_si + nrows+1; 4778 buf_si[0] = nrows; 4779 buf_si_i[0] = 0; 4780 nrows = 0; 4781 for (i=owners[proc]; i<owners[proc+1]; i++){ 4782 anzi = ai[i+1] - ai[i]; 4783 if (anzi) { 4784 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4785 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4786 nrows++; 4787 } 4788 } 4789 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4790 k++; 4791 buf_si += len_si[proc]; 4792 } 4793 4794 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4795 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4796 4797 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4798 for (i=0; i<merge->nrecv; i++){ 4799 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4800 } 4801 4802 ierr = PetscFree(len_si);CHKERRQ(ierr); 4803 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4804 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4805 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4806 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4807 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4808 ierr = PetscFree(status);CHKERRQ(ierr); 4809 4810 /* compute a local seq matrix in each processor */ 4811 /*----------------------------------------------*/ 4812 /* allocate bi array and free space for accumulating nonzero column info */ 4813 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4814 bi[0] = 0; 4815 4816 /* create and initialize a linked list */ 4817 nlnk = N+1; 4818 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4819 4820 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4821 len = 0; 4822 len = ai[owners[rank+1]] - ai[owners[rank]]; 4823 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4824 current_space = free_space; 4825 4826 /* determine symbolic info for each local row */ 4827 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4828 4829 for (k=0; k<merge->nrecv; k++){ 4830 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4831 nrows = *buf_ri_k[k]; 4832 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4833 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4834 } 4835 4836 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4837 len = 0; 4838 for (i=0;i<m;i++) { 4839 bnzi = 0; 4840 /* add local non-zero cols of this proc's seqmat into lnk */ 4841 arow = owners[rank] + i; 4842 anzi = ai[arow+1] - ai[arow]; 4843 aj = a->j + ai[arow]; 4844 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4845 bnzi += nlnk; 4846 /* add received col data into lnk */ 4847 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4848 if (i == *nextrow[k]) { /* i-th row */ 4849 anzi = *(nextai[k]+1) - *nextai[k]; 4850 aj = buf_rj[k] + *nextai[k]; 4851 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4852 bnzi += nlnk; 4853 nextrow[k]++; nextai[k]++; 4854 } 4855 } 4856 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4857 4858 /* if free space is not available, make more free space */ 4859 if (current_space->local_remaining<bnzi) { 4860 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4861 nspacedouble++; 4862 } 4863 /* copy data into free space, then initialize lnk */ 4864 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4865 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4866 4867 current_space->array += bnzi; 4868 current_space->local_used += bnzi; 4869 current_space->local_remaining -= bnzi; 4870 4871 bi[i+1] = bi[i] + bnzi; 4872 } 4873 4874 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4875 4876 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4877 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4878 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4879 4880 /* create symbolic parallel matrix B_mpi */ 4881 /*---------------------------------------*/ 4882 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4883 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4884 if (n==PETSC_DECIDE) { 4885 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4886 } else { 4887 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4888 } 4889 ierr = MatSetBlockSizes(B_mpi,bs,cbs); CHKERRQ(ierr); 4890 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4891 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4892 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4893 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4894 4895 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4896 B_mpi->assembled = PETSC_FALSE; 4897 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4898 merge->bi = bi; 4899 merge->bj = bj; 4900 merge->buf_ri = buf_ri; 4901 merge->buf_rj = buf_rj; 4902 merge->coi = PETSC_NULL; 4903 merge->coj = PETSC_NULL; 4904 merge->owners_co = PETSC_NULL; 4905 4906 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4907 4908 /* attach the supporting struct to B_mpi for reuse */ 4909 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4910 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4911 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4912 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4913 *mpimat = B_mpi; 4914 4915 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4916 PetscFunctionReturn(0); 4917 } 4918 4919 #undef __FUNCT__ 4920 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4921 /*@C 4922 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4923 matrices from each processor 4924 4925 Collective on MPI_Comm 4926 4927 Input Parameters: 4928 + comm - the communicators the parallel matrix will live on 4929 . seqmat - the input sequential matrices 4930 . m - number of local rows (or PETSC_DECIDE) 4931 . n - number of local columns (or PETSC_DECIDE) 4932 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4933 4934 Output Parameter: 4935 . mpimat - the parallel matrix generated 4936 4937 Level: advanced 4938 4939 Notes: 4940 The dimensions of the sequential matrix in each processor MUST be the same. 4941 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4942 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4943 @*/ 4944 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4945 { 4946 PetscErrorCode ierr; 4947 PetscMPIInt size; 4948 4949 PetscFunctionBegin; 4950 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4951 if (size == 1){ 4952 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4953 if (scall == MAT_INITIAL_MATRIX){ 4954 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4955 } else { 4956 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4957 } 4958 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4959 PetscFunctionReturn(0); 4960 } 4961 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4962 if (scall == MAT_INITIAL_MATRIX){ 4963 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4964 } 4965 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4966 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4967 PetscFunctionReturn(0); 4968 } 4969 4970 #undef __FUNCT__ 4971 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4972 /*@ 4973 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4974 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4975 with MatGetSize() 4976 4977 Not Collective 4978 4979 Input Parameters: 4980 + A - the matrix 4981 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4982 4983 Output Parameter: 4984 . A_loc - the local sequential matrix generated 4985 4986 Level: developer 4987 4988 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4989 4990 @*/ 4991 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4992 { 4993 PetscErrorCode ierr; 4994 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4995 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4996 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4997 MatScalar *aa=a->a,*ba=b->a,*cam; 4998 PetscScalar *ca; 4999 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5000 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5001 PetscBool match; 5002 5003 PetscFunctionBegin; 5004 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5005 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5006 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5007 if (scall == MAT_INITIAL_MATRIX){ 5008 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 5009 ci[0] = 0; 5010 for (i=0; i<am; i++){ 5011 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5012 } 5013 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 5014 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 5015 k = 0; 5016 for (i=0; i<am; i++) { 5017 ncols_o = bi[i+1] - bi[i]; 5018 ncols_d = ai[i+1] - ai[i]; 5019 /* off-diagonal portion of A */ 5020 for (jo=0; jo<ncols_o; jo++) { 5021 col = cmap[*bj]; 5022 if (col >= cstart) break; 5023 cj[k] = col; bj++; 5024 ca[k++] = *ba++; 5025 } 5026 /* diagonal portion of A */ 5027 for (j=0; j<ncols_d; j++) { 5028 cj[k] = cstart + *aj++; 5029 ca[k++] = *aa++; 5030 } 5031 /* off-diagonal portion of A */ 5032 for (j=jo; j<ncols_o; j++) { 5033 cj[k] = cmap[*bj++]; 5034 ca[k++] = *ba++; 5035 } 5036 } 5037 /* put together the new matrix */ 5038 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5039 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5040 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5041 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5042 mat->free_a = PETSC_TRUE; 5043 mat->free_ij = PETSC_TRUE; 5044 mat->nonew = 0; 5045 } else if (scall == MAT_REUSE_MATRIX){ 5046 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5047 ci = mat->i; cj = mat->j; cam = mat->a; 5048 for (i=0; i<am; i++) { 5049 /* off-diagonal portion of A */ 5050 ncols_o = bi[i+1] - bi[i]; 5051 for (jo=0; jo<ncols_o; jo++) { 5052 col = cmap[*bj]; 5053 if (col >= cstart) break; 5054 *cam++ = *ba++; bj++; 5055 } 5056 /* diagonal portion of A */ 5057 ncols_d = ai[i+1] - ai[i]; 5058 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5059 /* off-diagonal portion of A */ 5060 for (j=jo; j<ncols_o; j++) { 5061 *cam++ = *ba++; bj++; 5062 } 5063 } 5064 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5065 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5066 PetscFunctionReturn(0); 5067 } 5068 5069 #undef __FUNCT__ 5070 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5071 /*@C 5072 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5073 5074 Not Collective 5075 5076 Input Parameters: 5077 + A - the matrix 5078 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5079 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 5080 5081 Output Parameter: 5082 . A_loc - the local sequential matrix generated 5083 5084 Level: developer 5085 5086 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5087 5088 @*/ 5089 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5090 { 5091 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5092 PetscErrorCode ierr; 5093 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5094 IS isrowa,iscola; 5095 Mat *aloc; 5096 PetscBool match; 5097 5098 PetscFunctionBegin; 5099 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5100 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5101 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5102 if (!row){ 5103 start = A->rmap->rstart; end = A->rmap->rend; 5104 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5105 } else { 5106 isrowa = *row; 5107 } 5108 if (!col){ 5109 start = A->cmap->rstart; 5110 cmap = a->garray; 5111 nzA = a->A->cmap->n; 5112 nzB = a->B->cmap->n; 5113 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5114 ncols = 0; 5115 for (i=0; i<nzB; i++) { 5116 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5117 else break; 5118 } 5119 imark = i; 5120 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5121 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5122 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5123 } else { 5124 iscola = *col; 5125 } 5126 if (scall != MAT_INITIAL_MATRIX){ 5127 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5128 aloc[0] = *A_loc; 5129 } 5130 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5131 *A_loc = aloc[0]; 5132 ierr = PetscFree(aloc);CHKERRQ(ierr); 5133 if (!row){ 5134 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5135 } 5136 if (!col){ 5137 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5138 } 5139 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5140 PetscFunctionReturn(0); 5141 } 5142 5143 #undef __FUNCT__ 5144 #define __FUNCT__ "MatGetBrowsOfAcols" 5145 /*@C 5146 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5147 5148 Collective on Mat 5149 5150 Input Parameters: 5151 + A,B - the matrices in mpiaij format 5152 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5153 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 5154 5155 Output Parameter: 5156 + rowb, colb - index sets of rows and columns of B to extract 5157 - B_seq - the sequential matrix generated 5158 5159 Level: developer 5160 5161 @*/ 5162 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5163 { 5164 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5165 PetscErrorCode ierr; 5166 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5167 IS isrowb,iscolb; 5168 Mat *bseq=PETSC_NULL; 5169 5170 PetscFunctionBegin; 5171 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5172 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5173 } 5174 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5175 5176 if (scall == MAT_INITIAL_MATRIX){ 5177 start = A->cmap->rstart; 5178 cmap = a->garray; 5179 nzA = a->A->cmap->n; 5180 nzB = a->B->cmap->n; 5181 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5182 ncols = 0; 5183 for (i=0; i<nzB; i++) { /* row < local row index */ 5184 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5185 else break; 5186 } 5187 imark = i; 5188 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5189 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5190 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5191 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5192 } else { 5193 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5194 isrowb = *rowb; iscolb = *colb; 5195 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5196 bseq[0] = *B_seq; 5197 } 5198 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5199 *B_seq = bseq[0]; 5200 ierr = PetscFree(bseq);CHKERRQ(ierr); 5201 if (!rowb){ 5202 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5203 } else { 5204 *rowb = isrowb; 5205 } 5206 if (!colb){ 5207 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5208 } else { 5209 *colb = iscolb; 5210 } 5211 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5212 PetscFunctionReturn(0); 5213 } 5214 5215 #undef __FUNCT__ 5216 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5217 /* 5218 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5219 of the OFF-DIAGONAL portion of local A 5220 5221 Collective on Mat 5222 5223 Input Parameters: 5224 + A,B - the matrices in mpiaij format 5225 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5226 5227 Output Parameter: 5228 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5229 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5230 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 5231 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5232 5233 Level: developer 5234 5235 */ 5236 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5237 { 5238 VecScatter_MPI_General *gen_to,*gen_from; 5239 PetscErrorCode ierr; 5240 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5241 Mat_SeqAIJ *b_oth; 5242 VecScatter ctx=a->Mvctx; 5243 MPI_Comm comm=((PetscObject)ctx)->comm; 5244 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5245 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5246 PetscScalar *rvalues,*svalues; 5247 MatScalar *b_otha,*bufa,*bufA; 5248 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5249 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 5250 MPI_Status *sstatus,rstatus; 5251 PetscMPIInt jj; 5252 PetscInt *cols,sbs,rbs; 5253 PetscScalar *vals; 5254 5255 PetscFunctionBegin; 5256 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5257 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5258 } 5259 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5260 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5261 5262 gen_to = (VecScatter_MPI_General*)ctx->todata; 5263 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5264 rvalues = gen_from->values; /* holds the length of receiving row */ 5265 svalues = gen_to->values; /* holds the length of sending row */ 5266 nrecvs = gen_from->n; 5267 nsends = gen_to->n; 5268 5269 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5270 srow = gen_to->indices; /* local row index to be sent */ 5271 sstarts = gen_to->starts; 5272 sprocs = gen_to->procs; 5273 sstatus = gen_to->sstatus; 5274 sbs = gen_to->bs; 5275 rstarts = gen_from->starts; 5276 rprocs = gen_from->procs; 5277 rbs = gen_from->bs; 5278 5279 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5280 if (scall == MAT_INITIAL_MATRIX){ 5281 /* i-array */ 5282 /*---------*/ 5283 /* post receives */ 5284 for (i=0; i<nrecvs; i++){ 5285 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5286 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5287 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5288 } 5289 5290 /* pack the outgoing message */ 5291 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5292 sstartsj[0] = 0; rstartsj[0] = 0; 5293 len = 0; /* total length of j or a array to be sent */ 5294 k = 0; 5295 for (i=0; i<nsends; i++){ 5296 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5297 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5298 for (j=0; j<nrows; j++) { 5299 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5300 for (l=0; l<sbs; l++){ 5301 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 5302 rowlen[j*sbs+l] = ncols; 5303 len += ncols; 5304 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 5305 } 5306 k++; 5307 } 5308 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5309 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5310 } 5311 /* recvs and sends of i-array are completed */ 5312 i = nrecvs; 5313 while (i--) { 5314 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5315 } 5316 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5317 5318 /* allocate buffers for sending j and a arrays */ 5319 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5320 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5321 5322 /* create i-array of B_oth */ 5323 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5324 b_othi[0] = 0; 5325 len = 0; /* total length of j or a array to be received */ 5326 k = 0; 5327 for (i=0; i<nrecvs; i++){ 5328 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5329 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5330 for (j=0; j<nrows; j++) { 5331 b_othi[k+1] = b_othi[k] + rowlen[j]; 5332 len += rowlen[j]; k++; 5333 } 5334 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5335 } 5336 5337 /* allocate space for j and a arrrays of B_oth */ 5338 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5339 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5340 5341 /* j-array */ 5342 /*---------*/ 5343 /* post receives of j-array */ 5344 for (i=0; i<nrecvs; i++){ 5345 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5346 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5347 } 5348 5349 /* pack the outgoing message j-array */ 5350 k = 0; 5351 for (i=0; i<nsends; i++){ 5352 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5353 bufJ = bufj+sstartsj[i]; 5354 for (j=0; j<nrows; j++) { 5355 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5356 for (ll=0; ll<sbs; ll++){ 5357 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5358 for (l=0; l<ncols; l++){ 5359 *bufJ++ = cols[l]; 5360 } 5361 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5362 } 5363 } 5364 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5365 } 5366 5367 /* recvs and sends of j-array are completed */ 5368 i = nrecvs; 5369 while (i--) { 5370 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5371 } 5372 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5373 } else if (scall == MAT_REUSE_MATRIX){ 5374 sstartsj = *startsj_s; 5375 rstartsj = *startsj_r; 5376 bufa = *bufa_ptr; 5377 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5378 b_otha = b_oth->a; 5379 } else { 5380 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5381 } 5382 5383 /* a-array */ 5384 /*---------*/ 5385 /* post receives of a-array */ 5386 for (i=0; i<nrecvs; i++){ 5387 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5388 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5389 } 5390 5391 /* pack the outgoing message a-array */ 5392 k = 0; 5393 for (i=0; i<nsends; i++){ 5394 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5395 bufA = bufa+sstartsj[i]; 5396 for (j=0; j<nrows; j++) { 5397 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5398 for (ll=0; ll<sbs; ll++){ 5399 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5400 for (l=0; l<ncols; l++){ 5401 *bufA++ = vals[l]; 5402 } 5403 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5404 } 5405 } 5406 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5407 } 5408 /* recvs and sends of a-array are completed */ 5409 i = nrecvs; 5410 while (i--) { 5411 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5412 } 5413 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5414 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5415 5416 if (scall == MAT_INITIAL_MATRIX){ 5417 /* put together the new matrix */ 5418 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5419 5420 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5421 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5422 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5423 b_oth->free_a = PETSC_TRUE; 5424 b_oth->free_ij = PETSC_TRUE; 5425 b_oth->nonew = 0; 5426 5427 ierr = PetscFree(bufj);CHKERRQ(ierr); 5428 if (!startsj_s || !bufa_ptr){ 5429 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5430 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5431 } else { 5432 *startsj_s = sstartsj; 5433 *startsj_r = rstartsj; 5434 *bufa_ptr = bufa; 5435 } 5436 } 5437 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5438 PetscFunctionReturn(0); 5439 } 5440 5441 #undef __FUNCT__ 5442 #define __FUNCT__ "MatGetCommunicationStructs" 5443 /*@C 5444 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5445 5446 Not Collective 5447 5448 Input Parameters: 5449 . A - The matrix in mpiaij format 5450 5451 Output Parameter: 5452 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5453 . colmap - A map from global column index to local index into lvec 5454 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5455 5456 Level: developer 5457 5458 @*/ 5459 #if defined (PETSC_USE_CTABLE) 5460 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5461 #else 5462 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5463 #endif 5464 { 5465 Mat_MPIAIJ *a; 5466 5467 PetscFunctionBegin; 5468 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5469 PetscValidPointer(lvec, 2); 5470 PetscValidPointer(colmap, 3); 5471 PetscValidPointer(multScatter, 4); 5472 a = (Mat_MPIAIJ *) A->data; 5473 if (lvec) *lvec = a->lvec; 5474 if (colmap) *colmap = a->colmap; 5475 if (multScatter) *multScatter = a->Mvctx; 5476 PetscFunctionReturn(0); 5477 } 5478 5479 EXTERN_C_BEGIN 5480 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5481 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5482 extern PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5483 EXTERN_C_END 5484 5485 #undef __FUNCT__ 5486 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5487 /* 5488 Computes (B'*A')' since computing B*A directly is untenable 5489 5490 n p p 5491 ( ) ( ) ( ) 5492 m ( A ) * n ( B ) = m ( C ) 5493 ( ) ( ) ( ) 5494 5495 */ 5496 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5497 { 5498 PetscErrorCode ierr; 5499 Mat At,Bt,Ct; 5500 5501 PetscFunctionBegin; 5502 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5503 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5504 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5505 ierr = MatDestroy(&At);CHKERRQ(ierr); 5506 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5507 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5508 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5509 PetscFunctionReturn(0); 5510 } 5511 5512 #undef __FUNCT__ 5513 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5514 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5515 { 5516 PetscErrorCode ierr; 5517 PetscInt m=A->rmap->n,n=B->cmap->n; 5518 Mat Cmat; 5519 5520 PetscFunctionBegin; 5521 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5522 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5523 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5524 ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 5525 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5526 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5527 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5528 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5529 *C = Cmat; 5530 (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ; 5531 PetscFunctionReturn(0); 5532 } 5533 5534 /* ----------------------------------------------------------------*/ 5535 #undef __FUNCT__ 5536 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5537 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5538 { 5539 PetscErrorCode ierr; 5540 5541 PetscFunctionBegin; 5542 if (scall == MAT_INITIAL_MATRIX){ 5543 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5544 } 5545 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5546 PetscFunctionReturn(0); 5547 } 5548 5549 EXTERN_C_BEGIN 5550 #if defined(PETSC_HAVE_MUMPS) 5551 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5552 #endif 5553 #if defined(PETSC_HAVE_PASTIX) 5554 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5555 #endif 5556 #if defined(PETSC_HAVE_SUPERLU_DIST) 5557 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5558 #endif 5559 #if defined(PETSC_HAVE_CLIQUE) 5560 extern PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*); 5561 #endif 5562 EXTERN_C_END 5563 5564 /*MC 5565 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5566 5567 Options Database Keys: 5568 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5569 5570 Level: beginner 5571 5572 .seealso: MatCreateAIJ() 5573 M*/ 5574 5575 EXTERN_C_BEGIN 5576 #undef __FUNCT__ 5577 #define __FUNCT__ "MatCreate_MPIAIJ" 5578 PetscErrorCode MatCreate_MPIAIJ(Mat B) 5579 { 5580 Mat_MPIAIJ *b; 5581 PetscErrorCode ierr; 5582 PetscMPIInt size; 5583 5584 PetscFunctionBegin; 5585 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5586 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5587 B->data = (void*)b; 5588 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5589 B->assembled = PETSC_FALSE; 5590 B->insertmode = NOT_SET_VALUES; 5591 b->size = size; 5592 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5593 5594 /* build cache for off array entries formed */ 5595 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5596 b->donotstash = PETSC_FALSE; 5597 b->colmap = 0; 5598 b->garray = 0; 5599 b->roworiented = PETSC_TRUE; 5600 5601 /* stuff used for matrix vector multiply */ 5602 b->lvec = PETSC_NULL; 5603 b->Mvctx = PETSC_NULL; 5604 5605 /* stuff for MatGetRow() */ 5606 b->rowindices = 0; 5607 b->rowvalues = 0; 5608 b->getrowactive = PETSC_FALSE; 5609 5610 /* flexible pointer used in CUSP/CUSPARSE classes */ 5611 b->spptr = PETSC_NULL; 5612 5613 #if defined(PETSC_HAVE_MUMPS) 5614 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5615 "MatGetFactor_aij_mumps", 5616 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5617 #endif 5618 #if defined(PETSC_HAVE_PASTIX) 5619 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5620 "MatGetFactor_mpiaij_pastix", 5621 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5622 #endif 5623 #if defined(PETSC_HAVE_SUPERLU_DIST) 5624 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5625 "MatGetFactor_mpiaij_superlu_dist", 5626 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5627 #endif 5628 #if defined(PETSC_HAVE_CLIQUE) 5629 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_clique_C", 5630 "MatGetFactor_aij_clique", 5631 MatGetFactor_aij_clique);CHKERRQ(ierr); 5632 #endif 5633 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5634 "MatStoreValues_MPIAIJ", 5635 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5636 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5637 "MatRetrieveValues_MPIAIJ", 5638 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5639 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5640 "MatGetDiagonalBlock_MPIAIJ", 5641 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5642 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5643 "MatIsTranspose_MPIAIJ", 5644 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5645 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5646 "MatMPIAIJSetPreallocation_MPIAIJ", 5647 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5648 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5649 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5650 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5651 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5652 "MatDiagonalScaleLocal_MPIAIJ", 5653 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5654 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5655 "MatConvert_MPIAIJ_MPIAIJPERM", 5656 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5657 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5658 "MatConvert_MPIAIJ_MPIAIJCRL", 5659 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5660 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5661 "MatConvert_MPIAIJ_MPISBAIJ", 5662 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5663 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5664 "MatMatMult_MPIDense_MPIAIJ", 5665 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5666 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5667 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5668 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5669 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5670 "MatMatMultNumeric_MPIDense_MPIAIJ", 5671 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5672 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5673 PetscFunctionReturn(0); 5674 } 5675 EXTERN_C_END 5676 5677 #undef __FUNCT__ 5678 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5679 /*@ 5680 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5681 and "off-diagonal" part of the matrix in CSR format. 5682 5683 Collective on MPI_Comm 5684 5685 Input Parameters: 5686 + comm - MPI communicator 5687 . m - number of local rows (Cannot be PETSC_DECIDE) 5688 . n - This value should be the same as the local size used in creating the 5689 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5690 calculated if N is given) For square matrices n is almost always m. 5691 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5692 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5693 . i - row indices for "diagonal" portion of matrix 5694 . j - column indices 5695 . a - matrix values 5696 . oi - row indices for "off-diagonal" portion of matrix 5697 . oj - column indices 5698 - oa - matrix values 5699 5700 Output Parameter: 5701 . mat - the matrix 5702 5703 Level: advanced 5704 5705 Notes: 5706 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5707 must free the arrays once the matrix has been destroyed and not before. 5708 5709 The i and j indices are 0 based 5710 5711 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5712 5713 This sets local rows and cannot be used to set off-processor values. 5714 5715 You cannot later use MatSetValues() to change values in this matrix. 5716 5717 .keywords: matrix, aij, compressed row, sparse, parallel 5718 5719 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5720 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5721 @*/ 5722 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5723 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5724 { 5725 PetscErrorCode ierr; 5726 Mat_MPIAIJ *maij; 5727 5728 PetscFunctionBegin; 5729 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5730 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5731 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5732 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5733 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5734 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5735 maij = (Mat_MPIAIJ*) (*mat)->data; 5736 maij->donotstash = PETSC_TRUE; 5737 (*mat)->preallocated = PETSC_TRUE; 5738 5739 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5740 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5741 5742 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5743 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5744 5745 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5746 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5747 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5748 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5749 5750 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5751 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5752 PetscFunctionReturn(0); 5753 } 5754 5755 /* 5756 Special version for direct calls from Fortran 5757 */ 5758 #include <petsc-private/fortranimpl.h> 5759 5760 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5761 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5762 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5763 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5764 #endif 5765 5766 /* Change these macros so can be used in void function */ 5767 #undef CHKERRQ 5768 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5769 #undef SETERRQ2 5770 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5771 #undef SETERRQ3 5772 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5773 #undef SETERRQ 5774 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5775 5776 EXTERN_C_BEGIN 5777 #undef __FUNCT__ 5778 #define __FUNCT__ "matsetvaluesmpiaij_" 5779 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5780 { 5781 Mat mat = *mmat; 5782 PetscInt m = *mm, n = *mn; 5783 InsertMode addv = *maddv; 5784 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5785 PetscScalar value; 5786 PetscErrorCode ierr; 5787 5788 MatCheckPreallocated(mat,1); 5789 if (mat->insertmode == NOT_SET_VALUES) { 5790 mat->insertmode = addv; 5791 } 5792 #if defined(PETSC_USE_DEBUG) 5793 else if (mat->insertmode != addv) { 5794 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5795 } 5796 #endif 5797 { 5798 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5799 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5800 PetscBool roworiented = aij->roworiented; 5801 5802 /* Some Variables required in the macro */ 5803 Mat A = aij->A; 5804 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5805 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5806 MatScalar *aa = a->a; 5807 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5808 Mat B = aij->B; 5809 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5810 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5811 MatScalar *ba = b->a; 5812 5813 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5814 PetscInt nonew = a->nonew; 5815 MatScalar *ap1,*ap2; 5816 5817 PetscFunctionBegin; 5818 for (i=0; i<m; i++) { 5819 if (im[i] < 0) continue; 5820 #if defined(PETSC_USE_DEBUG) 5821 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5822 #endif 5823 if (im[i] >= rstart && im[i] < rend) { 5824 row = im[i] - rstart; 5825 lastcol1 = -1; 5826 rp1 = aj + ai[row]; 5827 ap1 = aa + ai[row]; 5828 rmax1 = aimax[row]; 5829 nrow1 = ailen[row]; 5830 low1 = 0; 5831 high1 = nrow1; 5832 lastcol2 = -1; 5833 rp2 = bj + bi[row]; 5834 ap2 = ba + bi[row]; 5835 rmax2 = bimax[row]; 5836 nrow2 = bilen[row]; 5837 low2 = 0; 5838 high2 = nrow2; 5839 5840 for (j=0; j<n; j++) { 5841 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5842 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5843 if (in[j] >= cstart && in[j] < cend){ 5844 col = in[j] - cstart; 5845 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5846 } else if (in[j] < 0) continue; 5847 #if defined(PETSC_USE_DEBUG) 5848 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5849 #endif 5850 else { 5851 if (mat->was_assembled) { 5852 if (!aij->colmap) { 5853 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5854 } 5855 #if defined (PETSC_USE_CTABLE) 5856 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5857 col--; 5858 #else 5859 col = aij->colmap[in[j]] - 1; 5860 #endif 5861 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5862 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5863 col = in[j]; 5864 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5865 B = aij->B; 5866 b = (Mat_SeqAIJ*)B->data; 5867 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5868 rp2 = bj + bi[row]; 5869 ap2 = ba + bi[row]; 5870 rmax2 = bimax[row]; 5871 nrow2 = bilen[row]; 5872 low2 = 0; 5873 high2 = nrow2; 5874 bm = aij->B->rmap->n; 5875 ba = b->a; 5876 } 5877 } else col = in[j]; 5878 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5879 } 5880 } 5881 } else { 5882 if (!aij->donotstash) { 5883 if (roworiented) { 5884 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5885 } else { 5886 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5887 } 5888 } 5889 } 5890 }} 5891 PetscFunctionReturnVoid(); 5892 } 5893 EXTERN_C_END 5894 5895