1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 /*MC 6 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 7 8 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 9 and MATMPIAIJ otherwise. As a result, for single process communicators, 10 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 11 for communicators controlling multiple processes. It is recommended that you call both of 12 the above preallocation routines for simplicity. 13 14 Options Database Keys: 15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 16 17 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 18 enough exist. 19 20 Level: beginner 21 22 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 23 M*/ 24 25 /*MC 26 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 27 28 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 29 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 30 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 31 for communicators controlling multiple processes. It is recommended that you call both of 32 the above preallocation routines for simplicity. 33 34 Options Database Keys: 35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 36 37 Level: beginner 38 39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 40 M*/ 41 42 #undef __FUNCT__ 43 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 44 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 45 { 46 PetscErrorCode ierr; 47 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 48 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 49 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 50 const PetscInt *ia,*ib; 51 const MatScalar *aa,*bb; 52 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 53 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 54 55 PetscFunctionBegin; 56 *keptrows = 0; 57 ia = a->i; 58 ib = b->i; 59 for (i=0; i<m; i++) { 60 na = ia[i+1] - ia[i]; 61 nb = ib[i+1] - ib[i]; 62 if (!na && !nb) { 63 cnt++; 64 goto ok1; 65 } 66 aa = a->a + ia[i]; 67 for (j=0; j<na; j++) { 68 if (aa[j] != 0.0) goto ok1; 69 } 70 bb = b->a + ib[i]; 71 for (j=0; j <nb; j++) { 72 if (bb[j] != 0.0) goto ok1; 73 } 74 cnt++; 75 ok1:; 76 } 77 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr); 78 if (!n0rows) PetscFunctionReturn(0); 79 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 80 cnt = 0; 81 for (i=0; i<m; i++) { 82 na = ia[i+1] - ia[i]; 83 nb = ib[i+1] - ib[i]; 84 if (!na && !nb) continue; 85 aa = a->a + ia[i]; 86 for (j=0; j<na;j++) { 87 if (aa[j] != 0.0) { 88 rows[cnt++] = rstart + i; 89 goto ok2; 90 } 91 } 92 bb = b->a + ib[i]; 93 for (j=0; j<nb; j++) { 94 if (bb[j] != 0.0) { 95 rows[cnt++] = rstart + i; 96 goto ok2; 97 } 98 } 99 ok2:; 100 } 101 ierr = ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 102 PetscFunctionReturn(0); 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 107 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 108 { 109 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 110 PetscErrorCode ierr; 111 PetscInt i,rstart,nrows,*rows; 112 113 PetscFunctionBegin; 114 *zrows = PETSC_NULL; 115 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 116 ierr = MatGetOwnershipRange(M,&rstart,PETSC_NULL);CHKERRQ(ierr); 117 for (i=0; i<nrows; i++) rows[i] += rstart; 118 ierr = ISCreateGeneral(((PetscObject)M)->comm,nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 119 PetscFunctionReturn(0); 120 } 121 122 #undef __FUNCT__ 123 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 124 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 125 { 126 PetscErrorCode ierr; 127 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 128 PetscInt i,n,*garray = aij->garray; 129 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 130 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 131 PetscReal *work; 132 133 PetscFunctionBegin; 134 ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr); 135 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 136 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 137 if (type == NORM_2) { 138 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 139 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 140 } 141 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 142 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 143 } 144 } else if (type == NORM_1) { 145 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 146 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 147 } 148 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 149 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 150 } 151 } else if (type == NORM_INFINITY) { 152 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 153 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 154 } 155 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 156 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 157 } 158 159 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType"); 160 if (type == NORM_INFINITY) { 161 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 162 } else { 163 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 164 } 165 ierr = PetscFree(work);CHKERRQ(ierr); 166 if (type == NORM_2) { 167 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 168 } 169 PetscFunctionReturn(0); 170 } 171 172 #undef __FUNCT__ 173 #define __FUNCT__ "MatDistribute_MPIAIJ" 174 /* 175 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 176 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 177 178 Only for square matrices 179 */ 180 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 181 { 182 PetscMPIInt rank,size; 183 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld,bses[2]; 184 PetscErrorCode ierr; 185 Mat mat; 186 Mat_SeqAIJ *gmata; 187 PetscMPIInt tag; 188 MPI_Status status; 189 PetscBool aij; 190 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 191 192 PetscFunctionBegin; 193 CHKMEMQ; 194 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 195 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 196 if (!rank) { 197 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 198 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 199 } 200 if (reuse == MAT_INITIAL_MATRIX) { 201 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 202 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 203 if (!rank) { 204 bses[0] = gmat->rmap->bs; 205 bses[1] = gmat->cmap->bs; 206 } 207 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 208 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 209 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 210 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 211 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 212 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 213 rowners[0] = 0; 214 for (i=2; i<=size; i++) { 215 rowners[i] += rowners[i-1]; 216 } 217 rstart = rowners[rank]; 218 rend = rowners[rank+1]; 219 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 220 if (!rank) { 221 gmata = (Mat_SeqAIJ*) gmat->data; 222 /* send row lengths to all processors */ 223 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 224 for (i=1; i<size; i++) { 225 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 226 } 227 /* determine number diagonal and off-diagonal counts */ 228 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 229 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 230 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 231 jj = 0; 232 for (i=0; i<m; i++) { 233 for (j=0; j<dlens[i]; j++) { 234 if (gmata->j[jj] < rstart) ld[i]++; 235 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 236 jj++; 237 } 238 } 239 /* send column indices to other processes */ 240 for (i=1; i<size; i++) { 241 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 242 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 243 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 244 } 245 246 /* send numerical values to other processes */ 247 for (i=1; i<size; i++) { 248 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 249 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 250 } 251 gmataa = gmata->a; 252 gmataj = gmata->j; 253 254 } else { 255 /* receive row lengths */ 256 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 257 /* receive column indices */ 258 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 259 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 260 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 261 /* determine number diagonal and off-diagonal counts */ 262 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 263 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 264 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 265 jj = 0; 266 for (i=0; i<m; i++) { 267 for (j=0; j<dlens[i]; j++) { 268 if (gmataj[jj] < rstart) ld[i]++; 269 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 270 jj++; 271 } 272 } 273 /* receive numerical values */ 274 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 275 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 276 } 277 /* set preallocation */ 278 for (i=0; i<m; i++) { 279 dlens[i] -= olens[i]; 280 } 281 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 282 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 283 284 for (i=0; i<m; i++) { 285 dlens[i] += olens[i]; 286 } 287 cnt = 0; 288 for (i=0; i<m; i++) { 289 row = rstart + i; 290 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 291 cnt += dlens[i]; 292 } 293 if (rank) { 294 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 295 } 296 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 297 ierr = PetscFree(rowners);CHKERRQ(ierr); 298 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 299 *inmat = mat; 300 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 301 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 302 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 303 mat = *inmat; 304 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 305 if (!rank) { 306 /* send numerical values to other processes */ 307 gmata = (Mat_SeqAIJ*) gmat->data; 308 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 309 gmataa = gmata->a; 310 for (i=1; i<size; i++) { 311 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 312 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 313 } 314 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 315 } else { 316 /* receive numerical values from process 0*/ 317 nz = Ad->nz + Ao->nz; 318 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 319 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 320 } 321 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 322 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 323 ad = Ad->a; 324 ao = Ao->a; 325 if (mat->rmap->n) { 326 i = 0; 327 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 328 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 329 } 330 for (i=1; i<mat->rmap->n; i++) { 331 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 332 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 333 } 334 i--; 335 if (mat->rmap->n) { 336 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 337 } 338 if (rank) { 339 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 340 } 341 } 342 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 343 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 344 CHKMEMQ; 345 PetscFunctionReturn(0); 346 } 347 348 /* 349 Local utility routine that creates a mapping from the global column 350 number to the local number in the off-diagonal part of the local 351 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 352 a slightly higher hash table cost; without it it is not scalable (each processor 353 has an order N integer array but is fast to acess. 354 */ 355 #undef __FUNCT__ 356 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 357 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 358 { 359 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 360 PetscErrorCode ierr; 361 PetscInt n = aij->B->cmap->n,i; 362 363 PetscFunctionBegin; 364 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 365 #if defined (PETSC_USE_CTABLE) 366 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 367 for (i=0; i<n; i++){ 368 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 369 } 370 #else 371 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 372 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 373 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 374 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 375 #endif 376 PetscFunctionReturn(0); 377 } 378 379 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 380 { \ 381 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 382 lastcol1 = col;\ 383 while (high1-low1 > 5) { \ 384 t = (low1+high1)/2; \ 385 if (rp1[t] > col) high1 = t; \ 386 else low1 = t; \ 387 } \ 388 for (_i=low1; _i<high1; _i++) { \ 389 if (rp1[_i] > col) break; \ 390 if (rp1[_i] == col) { \ 391 if (addv == ADD_VALUES) ap1[_i] += value; \ 392 else ap1[_i] = value; \ 393 goto a_noinsert; \ 394 } \ 395 } \ 396 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 397 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 398 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 399 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 400 N = nrow1++ - 1; a->nz++; high1++; \ 401 /* shift up all the later entries in this row */ \ 402 for (ii=N; ii>=_i; ii--) { \ 403 rp1[ii+1] = rp1[ii]; \ 404 ap1[ii+1] = ap1[ii]; \ 405 } \ 406 rp1[_i] = col; \ 407 ap1[_i] = value; \ 408 a_noinsert: ; \ 409 ailen[row] = nrow1; \ 410 } 411 412 413 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 414 { \ 415 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 416 lastcol2 = col;\ 417 while (high2-low2 > 5) { \ 418 t = (low2+high2)/2; \ 419 if (rp2[t] > col) high2 = t; \ 420 else low2 = t; \ 421 } \ 422 for (_i=low2; _i<high2; _i++) { \ 423 if (rp2[_i] > col) break; \ 424 if (rp2[_i] == col) { \ 425 if (addv == ADD_VALUES) ap2[_i] += value; \ 426 else ap2[_i] = value; \ 427 goto b_noinsert; \ 428 } \ 429 } \ 430 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 431 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 432 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 433 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 434 N = nrow2++ - 1; b->nz++; high2++; \ 435 /* shift up all the later entries in this row */ \ 436 for (ii=N; ii>=_i; ii--) { \ 437 rp2[ii+1] = rp2[ii]; \ 438 ap2[ii+1] = ap2[ii]; \ 439 } \ 440 rp2[_i] = col; \ 441 ap2[_i] = value; \ 442 b_noinsert: ; \ 443 bilen[row] = nrow2; \ 444 } 445 446 #undef __FUNCT__ 447 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 448 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 449 { 450 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 451 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 452 PetscErrorCode ierr; 453 PetscInt l,*garray = mat->garray,diag; 454 455 PetscFunctionBegin; 456 /* code only works for square matrices A */ 457 458 /* find size of row to the left of the diagonal part */ 459 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 460 row = row - diag; 461 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 462 if (garray[b->j[b->i[row]+l]] > diag) break; 463 } 464 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 465 466 /* diagonal part */ 467 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 468 469 /* right of diagonal part */ 470 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 471 PetscFunctionReturn(0); 472 } 473 474 #undef __FUNCT__ 475 #define __FUNCT__ "MatSetValues_MPIAIJ" 476 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 477 { 478 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 479 PetscScalar value; 480 PetscErrorCode ierr; 481 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 482 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 483 PetscBool roworiented = aij->roworiented; 484 485 /* Some Variables required in the macro */ 486 Mat A = aij->A; 487 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 488 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 489 MatScalar *aa = a->a; 490 PetscBool ignorezeroentries = a->ignorezeroentries; 491 Mat B = aij->B; 492 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 493 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 494 MatScalar *ba = b->a; 495 496 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 497 PetscInt nonew; 498 MatScalar *ap1,*ap2; 499 500 PetscFunctionBegin; 501 if (v) PetscValidScalarPointer(v,6); 502 for (i=0; i<m; i++) { 503 if (im[i] < 0) continue; 504 #if defined(PETSC_USE_DEBUG) 505 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 506 #endif 507 if (im[i] >= rstart && im[i] < rend) { 508 row = im[i] - rstart; 509 lastcol1 = -1; 510 rp1 = aj + ai[row]; 511 ap1 = aa + ai[row]; 512 rmax1 = aimax[row]; 513 nrow1 = ailen[row]; 514 low1 = 0; 515 high1 = nrow1; 516 lastcol2 = -1; 517 rp2 = bj + bi[row]; 518 ap2 = ba + bi[row]; 519 rmax2 = bimax[row]; 520 nrow2 = bilen[row]; 521 low2 = 0; 522 high2 = nrow2; 523 524 for (j=0; j<n; j++) { 525 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 526 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 527 if (in[j] >= cstart && in[j] < cend){ 528 col = in[j] - cstart; 529 nonew = a->nonew; 530 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 531 } else if (in[j] < 0) continue; 532 #if defined(PETSC_USE_DEBUG) 533 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 534 #endif 535 else { 536 if (mat->was_assembled) { 537 if (!aij->colmap) { 538 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 539 } 540 #if defined (PETSC_USE_CTABLE) 541 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 542 col--; 543 #else 544 col = aij->colmap[in[j]] - 1; 545 #endif 546 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 547 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 548 col = in[j]; 549 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 550 B = aij->B; 551 b = (Mat_SeqAIJ*)B->data; 552 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 553 rp2 = bj + bi[row]; 554 ap2 = ba + bi[row]; 555 rmax2 = bimax[row]; 556 nrow2 = bilen[row]; 557 low2 = 0; 558 high2 = nrow2; 559 bm = aij->B->rmap->n; 560 ba = b->a; 561 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 562 } else col = in[j]; 563 nonew = b->nonew; 564 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 565 } 566 } 567 } else { 568 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 569 if (!aij->donotstash) { 570 mat->assembled = PETSC_FALSE; 571 if (roworiented) { 572 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 573 } else { 574 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 575 } 576 } 577 } 578 } 579 PetscFunctionReturn(0); 580 } 581 582 #undef __FUNCT__ 583 #define __FUNCT__ "MatGetValues_MPIAIJ" 584 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 585 { 586 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 587 PetscErrorCode ierr; 588 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 589 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 590 591 PetscFunctionBegin; 592 for (i=0; i<m; i++) { 593 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 594 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 595 if (idxm[i] >= rstart && idxm[i] < rend) { 596 row = idxm[i] - rstart; 597 for (j=0; j<n; j++) { 598 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 599 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 600 if (idxn[j] >= cstart && idxn[j] < cend){ 601 col = idxn[j] - cstart; 602 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 603 } else { 604 if (!aij->colmap) { 605 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 606 } 607 #if defined (PETSC_USE_CTABLE) 608 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 609 col --; 610 #else 611 col = aij->colmap[idxn[j]] - 1; 612 #endif 613 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 614 else { 615 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 616 } 617 } 618 } 619 } else { 620 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 621 } 622 } 623 PetscFunctionReturn(0); 624 } 625 626 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 627 628 #undef __FUNCT__ 629 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 630 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 631 { 632 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 633 PetscErrorCode ierr; 634 PetscInt nstash,reallocs; 635 InsertMode addv; 636 637 PetscFunctionBegin; 638 if (aij->donotstash || mat->nooffprocentries) { 639 PetscFunctionReturn(0); 640 } 641 642 /* make sure all processors are either in INSERTMODE or ADDMODE */ 643 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 644 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 645 mat->insertmode = addv; /* in case this processor had no cache */ 646 647 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 648 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 649 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 650 PetscFunctionReturn(0); 651 } 652 653 #undef __FUNCT__ 654 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 655 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 656 { 657 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 658 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 659 PetscErrorCode ierr; 660 PetscMPIInt n; 661 PetscInt i,j,rstart,ncols,flg; 662 PetscInt *row,*col; 663 PetscBool other_disassembled; 664 PetscScalar *val; 665 InsertMode addv = mat->insertmode; 666 667 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 668 PetscFunctionBegin; 669 if (!aij->donotstash && !mat->nooffprocentries) { 670 while (1) { 671 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 672 if (!flg) break; 673 674 for (i=0; i<n;) { 675 /* Now identify the consecutive vals belonging to the same row */ 676 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 677 if (j < n) ncols = j-i; 678 else ncols = n-i; 679 /* Now assemble all these values with a single function call */ 680 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 681 i = j; 682 } 683 } 684 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 685 } 686 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 687 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 688 689 /* determine if any processor has disassembled, if so we must 690 also disassemble ourselfs, in order that we may reassemble. */ 691 /* 692 if nonzero structure of submatrix B cannot change then we know that 693 no processor disassembled thus we can skip this stuff 694 */ 695 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 696 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 697 if (mat->was_assembled && !other_disassembled) { 698 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 699 } 700 } 701 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 702 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 703 } 704 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 705 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 706 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 707 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 708 709 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 710 aij->rowvalues = 0; 711 712 /* used by MatAXPY() */ 713 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 714 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 715 716 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 717 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 718 PetscFunctionReturn(0); 719 } 720 721 #undef __FUNCT__ 722 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 723 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 724 { 725 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 726 PetscErrorCode ierr; 727 728 PetscFunctionBegin; 729 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 730 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 731 PetscFunctionReturn(0); 732 } 733 734 #undef __FUNCT__ 735 #define __FUNCT__ "MatZeroRows_MPIAIJ" 736 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 737 { 738 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 739 PetscErrorCode ierr; 740 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 741 PetscInt i,*owners = A->rmap->range; 742 PetscInt *nprocs,j,idx,nsends,row; 743 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 744 PetscInt *rvalues,count,base,slen,*source; 745 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 746 MPI_Comm comm = ((PetscObject)A)->comm; 747 MPI_Request *send_waits,*recv_waits; 748 MPI_Status recv_status,*send_status; 749 const PetscScalar *xx; 750 PetscScalar *bb; 751 #if defined(PETSC_DEBUG) 752 PetscBool found = PETSC_FALSE; 753 #endif 754 755 PetscFunctionBegin; 756 /* first count number of contributors to each processor */ 757 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 758 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 759 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 760 j = 0; 761 for (i=0; i<N; i++) { 762 if (lastidx > (idx = rows[i])) j = 0; 763 lastidx = idx; 764 for (; j<size; j++) { 765 if (idx >= owners[j] && idx < owners[j+1]) { 766 nprocs[2*j]++; 767 nprocs[2*j+1] = 1; 768 owner[i] = j; 769 #if defined(PETSC_DEBUG) 770 found = PETSC_TRUE; 771 #endif 772 break; 773 } 774 } 775 #if defined(PETSC_DEBUG) 776 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 777 found = PETSC_FALSE; 778 #endif 779 } 780 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 781 782 if (A->nooffproczerorows) { 783 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 784 nrecvs = nsends; 785 nmax = N; 786 } else { 787 /* inform other processors of number of messages and max length*/ 788 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 789 } 790 791 /* post receives: */ 792 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 793 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 794 for (i=0; i<nrecvs; i++) { 795 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 796 } 797 798 /* do sends: 799 1) starts[i] gives the starting index in svalues for stuff going to 800 the ith processor 801 */ 802 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 803 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 804 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 805 starts[0] = 0; 806 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 807 for (i=0; i<N; i++) { 808 svalues[starts[owner[i]]++] = rows[i]; 809 } 810 811 starts[0] = 0; 812 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 813 count = 0; 814 for (i=0; i<size; i++) { 815 if (nprocs[2*i+1]) { 816 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 817 } 818 } 819 ierr = PetscFree(starts);CHKERRQ(ierr); 820 821 base = owners[rank]; 822 823 /* wait on receives */ 824 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 825 count = nrecvs; slen = 0; 826 while (count) { 827 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 828 /* unpack receives into our local space */ 829 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 830 source[imdex] = recv_status.MPI_SOURCE; 831 lens[imdex] = n; 832 slen += n; 833 count--; 834 } 835 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 836 837 /* move the data into the send scatter */ 838 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 839 count = 0; 840 for (i=0; i<nrecvs; i++) { 841 values = rvalues + i*nmax; 842 for (j=0; j<lens[i]; j++) { 843 lrows[count++] = values[j] - base; 844 } 845 } 846 ierr = PetscFree(rvalues);CHKERRQ(ierr); 847 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 848 ierr = PetscFree(owner);CHKERRQ(ierr); 849 ierr = PetscFree(nprocs);CHKERRQ(ierr); 850 851 /* fix right hand side if needed */ 852 if (x && b) { 853 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 854 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 855 for (i=0; i<slen; i++) { 856 bb[lrows[i]] = diag*xx[lrows[i]]; 857 } 858 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 859 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 860 } 861 /* 862 Zero the required rows. If the "diagonal block" of the matrix 863 is square and the user wishes to set the diagonal we use separate 864 code so that MatSetValues() is not called for each diagonal allocating 865 new memory, thus calling lots of mallocs and slowing things down. 866 867 */ 868 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 869 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 870 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 871 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 872 } else if (diag != 0.0) { 873 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 874 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 875 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 876 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 877 } 878 for (i = 0; i < slen; i++) { 879 row = lrows[i] + rstart; 880 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 881 } 882 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 883 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 884 } else { 885 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 886 } 887 ierr = PetscFree(lrows);CHKERRQ(ierr); 888 889 /* wait on sends */ 890 if (nsends) { 891 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 892 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 893 ierr = PetscFree(send_status);CHKERRQ(ierr); 894 } 895 ierr = PetscFree(send_waits);CHKERRQ(ierr); 896 ierr = PetscFree(svalues);CHKERRQ(ierr); 897 PetscFunctionReturn(0); 898 } 899 900 #undef __FUNCT__ 901 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 902 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 903 { 904 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 905 PetscErrorCode ierr; 906 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 907 PetscInt i,*owners = A->rmap->range; 908 PetscInt *nprocs,j,idx,nsends; 909 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 910 PetscInt *rvalues,count,base,slen,*source; 911 PetscInt *lens,*lrows,*values,m; 912 MPI_Comm comm = ((PetscObject)A)->comm; 913 MPI_Request *send_waits,*recv_waits; 914 MPI_Status recv_status,*send_status; 915 const PetscScalar *xx; 916 PetscScalar *bb,*mask; 917 Vec xmask,lmask; 918 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 919 const PetscInt *aj, *ii,*ridx; 920 PetscScalar *aa; 921 #if defined(PETSC_DEBUG) 922 PetscBool found = PETSC_FALSE; 923 #endif 924 925 PetscFunctionBegin; 926 /* first count number of contributors to each processor */ 927 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 928 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 929 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 930 j = 0; 931 for (i=0; i<N; i++) { 932 if (lastidx > (idx = rows[i])) j = 0; 933 lastidx = idx; 934 for (; j<size; j++) { 935 if (idx >= owners[j] && idx < owners[j+1]) { 936 nprocs[2*j]++; 937 nprocs[2*j+1] = 1; 938 owner[i] = j; 939 #if defined(PETSC_DEBUG) 940 found = PETSC_TRUE; 941 #endif 942 break; 943 } 944 } 945 #if defined(PETSC_DEBUG) 946 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 947 found = PETSC_FALSE; 948 #endif 949 } 950 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 951 952 /* inform other processors of number of messages and max length*/ 953 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 954 955 /* post receives: */ 956 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 957 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 958 for (i=0; i<nrecvs; i++) { 959 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 960 } 961 962 /* do sends: 963 1) starts[i] gives the starting index in svalues for stuff going to 964 the ith processor 965 */ 966 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 967 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 968 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 969 starts[0] = 0; 970 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 971 for (i=0; i<N; i++) { 972 svalues[starts[owner[i]]++] = rows[i]; 973 } 974 975 starts[0] = 0; 976 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 977 count = 0; 978 for (i=0; i<size; i++) { 979 if (nprocs[2*i+1]) { 980 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 981 } 982 } 983 ierr = PetscFree(starts);CHKERRQ(ierr); 984 985 base = owners[rank]; 986 987 /* wait on receives */ 988 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 989 count = nrecvs; slen = 0; 990 while (count) { 991 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 992 /* unpack receives into our local space */ 993 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 994 source[imdex] = recv_status.MPI_SOURCE; 995 lens[imdex] = n; 996 slen += n; 997 count--; 998 } 999 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1000 1001 /* move the data into the send scatter */ 1002 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1003 count = 0; 1004 for (i=0; i<nrecvs; i++) { 1005 values = rvalues + i*nmax; 1006 for (j=0; j<lens[i]; j++) { 1007 lrows[count++] = values[j] - base; 1008 } 1009 } 1010 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1011 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1012 ierr = PetscFree(owner);CHKERRQ(ierr); 1013 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1014 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 1015 1016 /* zero diagonal part of matrix */ 1017 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 1018 1019 /* handle off diagonal part of matrix */ 1020 ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr); 1021 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1022 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1023 for (i=0; i<slen; i++) { 1024 bb[lrows[i]] = 1; 1025 } 1026 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1027 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1028 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1029 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1030 if (x) { 1031 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1032 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1033 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1034 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1035 } 1036 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1037 1038 /* remove zeroed rows of off diagonal matrix */ 1039 ii = aij->i; 1040 for (i=0; i<slen; i++) { 1041 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1042 } 1043 1044 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1045 if (aij->compressedrow.use){ 1046 m = aij->compressedrow.nrows; 1047 ii = aij->compressedrow.i; 1048 ridx = aij->compressedrow.rindex; 1049 for (i=0; i<m; i++){ 1050 n = ii[i+1] - ii[i]; 1051 aj = aij->j + ii[i]; 1052 aa = aij->a + ii[i]; 1053 1054 for (j=0; j<n; j++) { 1055 if (PetscAbsScalar(mask[*aj])) { 1056 if (b) bb[*ridx] -= *aa*xx[*aj]; 1057 *aa = 0.0; 1058 } 1059 aa++; 1060 aj++; 1061 } 1062 ridx++; 1063 } 1064 } else { /* do not use compressed row format */ 1065 m = l->B->rmap->n; 1066 for (i=0; i<m; i++) { 1067 n = ii[i+1] - ii[i]; 1068 aj = aij->j + ii[i]; 1069 aa = aij->a + ii[i]; 1070 for (j=0; j<n; j++) { 1071 if (PetscAbsScalar(mask[*aj])) { 1072 if (b) bb[i] -= *aa*xx[*aj]; 1073 *aa = 0.0; 1074 } 1075 aa++; 1076 aj++; 1077 } 1078 } 1079 } 1080 if (x) { 1081 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1082 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1083 } 1084 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1085 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1086 ierr = PetscFree(lrows);CHKERRQ(ierr); 1087 1088 /* wait on sends */ 1089 if (nsends) { 1090 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1091 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1092 ierr = PetscFree(send_status);CHKERRQ(ierr); 1093 } 1094 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1095 ierr = PetscFree(svalues);CHKERRQ(ierr); 1096 1097 PetscFunctionReturn(0); 1098 } 1099 1100 #undef __FUNCT__ 1101 #define __FUNCT__ "MatMult_MPIAIJ" 1102 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1103 { 1104 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1105 PetscErrorCode ierr; 1106 PetscInt nt; 1107 1108 PetscFunctionBegin; 1109 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1110 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1111 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1112 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1113 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1114 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1115 PetscFunctionReturn(0); 1116 } 1117 1118 #undef __FUNCT__ 1119 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1120 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1121 { 1122 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1123 PetscErrorCode ierr; 1124 1125 PetscFunctionBegin; 1126 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1127 PetscFunctionReturn(0); 1128 } 1129 1130 #undef __FUNCT__ 1131 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1132 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1133 { 1134 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1135 PetscErrorCode ierr; 1136 1137 PetscFunctionBegin; 1138 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1139 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1140 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1141 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1142 PetscFunctionReturn(0); 1143 } 1144 1145 #undef __FUNCT__ 1146 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1147 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1148 { 1149 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1150 PetscErrorCode ierr; 1151 PetscBool merged; 1152 1153 PetscFunctionBegin; 1154 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1155 /* do nondiagonal part */ 1156 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1157 if (!merged) { 1158 /* send it on its way */ 1159 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1160 /* do local part */ 1161 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1162 /* receive remote parts: note this assumes the values are not actually */ 1163 /* added in yy until the next line, */ 1164 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1165 } else { 1166 /* do local part */ 1167 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1168 /* send it on its way */ 1169 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1170 /* values actually were received in the Begin() but we need to call this nop */ 1171 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1172 } 1173 PetscFunctionReturn(0); 1174 } 1175 1176 EXTERN_C_BEGIN 1177 #undef __FUNCT__ 1178 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1179 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1180 { 1181 MPI_Comm comm; 1182 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 1183 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1184 IS Me,Notme; 1185 PetscErrorCode ierr; 1186 PetscInt M,N,first,last,*notme,i; 1187 PetscMPIInt size; 1188 1189 PetscFunctionBegin; 1190 1191 /* Easy test: symmetric diagonal block */ 1192 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 1193 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1194 if (!*f) PetscFunctionReturn(0); 1195 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1196 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1197 if (size == 1) PetscFunctionReturn(0); 1198 1199 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1200 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1201 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1202 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1203 for (i=0; i<first; i++) notme[i] = i; 1204 for (i=last; i<M; i++) notme[i-last+first] = i; 1205 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1206 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1207 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1208 Aoff = Aoffs[0]; 1209 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1210 Boff = Boffs[0]; 1211 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1212 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1213 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1214 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1215 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1216 ierr = PetscFree(notme);CHKERRQ(ierr); 1217 PetscFunctionReturn(0); 1218 } 1219 EXTERN_C_END 1220 1221 #undef __FUNCT__ 1222 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1223 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1224 { 1225 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1226 PetscErrorCode ierr; 1227 1228 PetscFunctionBegin; 1229 /* do nondiagonal part */ 1230 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1231 /* send it on its way */ 1232 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1233 /* do local part */ 1234 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1235 /* receive remote parts */ 1236 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1237 PetscFunctionReturn(0); 1238 } 1239 1240 /* 1241 This only works correctly for square matrices where the subblock A->A is the 1242 diagonal block 1243 */ 1244 #undef __FUNCT__ 1245 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1246 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1247 { 1248 PetscErrorCode ierr; 1249 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1250 1251 PetscFunctionBegin; 1252 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1253 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1254 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1255 PetscFunctionReturn(0); 1256 } 1257 1258 #undef __FUNCT__ 1259 #define __FUNCT__ "MatScale_MPIAIJ" 1260 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1261 { 1262 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1263 PetscErrorCode ierr; 1264 1265 PetscFunctionBegin; 1266 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1267 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1268 PetscFunctionReturn(0); 1269 } 1270 1271 #undef __FUNCT__ 1272 #define __FUNCT__ "MatDestroy_MPIAIJ" 1273 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1274 { 1275 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1276 PetscErrorCode ierr; 1277 1278 PetscFunctionBegin; 1279 #if defined(PETSC_USE_LOG) 1280 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1281 #endif 1282 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1283 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1284 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1285 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1286 #if defined (PETSC_USE_CTABLE) 1287 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1288 #else 1289 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1290 #endif 1291 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1292 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1293 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1294 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1295 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1296 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1297 1298 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1299 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1303 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1304 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1305 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1306 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1307 PetscFunctionReturn(0); 1308 } 1309 1310 #undef __FUNCT__ 1311 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1312 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1313 { 1314 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1315 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 1316 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 1317 PetscErrorCode ierr; 1318 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1319 int fd; 1320 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1321 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1322 PetscScalar *column_values; 1323 PetscInt message_count,flowcontrolcount; 1324 FILE *file; 1325 1326 PetscFunctionBegin; 1327 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1328 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1329 nz = A->nz + B->nz; 1330 if (!rank) { 1331 header[0] = MAT_FILE_CLASSID; 1332 header[1] = mat->rmap->N; 1333 header[2] = mat->cmap->N; 1334 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1335 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1336 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1337 /* get largest number of rows any processor has */ 1338 rlen = mat->rmap->n; 1339 range = mat->rmap->range; 1340 for (i=1; i<size; i++) { 1341 rlen = PetscMax(rlen,range[i+1] - range[i]); 1342 } 1343 } else { 1344 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1345 rlen = mat->rmap->n; 1346 } 1347 1348 /* load up the local row counts */ 1349 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1350 for (i=0; i<mat->rmap->n; i++) { 1351 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1352 } 1353 1354 /* store the row lengths to the file */ 1355 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1356 if (!rank) { 1357 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1358 for (i=1; i<size; i++) { 1359 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1360 rlen = range[i+1] - range[i]; 1361 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1362 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1363 } 1364 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1365 } else { 1366 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1367 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1368 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1369 } 1370 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1371 1372 /* load up the local column indices */ 1373 nzmax = nz; /* )th processor needs space a largest processor needs */ 1374 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1375 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1376 cnt = 0; 1377 for (i=0; i<mat->rmap->n; i++) { 1378 for (j=B->i[i]; j<B->i[i+1]; j++) { 1379 if ( (col = garray[B->j[j]]) > cstart) break; 1380 column_indices[cnt++] = col; 1381 } 1382 for (k=A->i[i]; k<A->i[i+1]; k++) { 1383 column_indices[cnt++] = A->j[k] + cstart; 1384 } 1385 for (; j<B->i[i+1]; j++) { 1386 column_indices[cnt++] = garray[B->j[j]]; 1387 } 1388 } 1389 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1390 1391 /* store the column indices to the file */ 1392 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1393 if (!rank) { 1394 MPI_Status status; 1395 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1396 for (i=1; i<size; i++) { 1397 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1398 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1399 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1400 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1401 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1402 } 1403 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1404 } else { 1405 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1406 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1407 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1408 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1409 } 1410 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1411 1412 /* load up the local column values */ 1413 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1414 cnt = 0; 1415 for (i=0; i<mat->rmap->n; i++) { 1416 for (j=B->i[i]; j<B->i[i+1]; j++) { 1417 if ( garray[B->j[j]] > cstart) break; 1418 column_values[cnt++] = B->a[j]; 1419 } 1420 for (k=A->i[i]; k<A->i[i+1]; k++) { 1421 column_values[cnt++] = A->a[k]; 1422 } 1423 for (; j<B->i[i+1]; j++) { 1424 column_values[cnt++] = B->a[j]; 1425 } 1426 } 1427 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1428 1429 /* store the column values to the file */ 1430 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1431 if (!rank) { 1432 MPI_Status status; 1433 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1434 for (i=1; i<size; i++) { 1435 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1436 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1437 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1438 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1439 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1440 } 1441 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1442 } else { 1443 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1444 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1445 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1446 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1447 } 1448 ierr = PetscFree(column_values);CHKERRQ(ierr); 1449 1450 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1451 if (file) { 1452 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1453 } 1454 1455 PetscFunctionReturn(0); 1456 } 1457 1458 #undef __FUNCT__ 1459 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1460 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1461 { 1462 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1463 PetscErrorCode ierr; 1464 PetscMPIInt rank = aij->rank,size = aij->size; 1465 PetscBool isdraw,iascii,isbinary; 1466 PetscViewer sviewer; 1467 PetscViewerFormat format; 1468 1469 PetscFunctionBegin; 1470 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1471 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1472 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1473 if (iascii) { 1474 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1475 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1476 MatInfo info; 1477 PetscBool inodes; 1478 1479 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1480 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1481 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1482 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1483 if (!inodes) { 1484 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1485 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1486 } else { 1487 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1488 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1489 } 1490 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1491 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1492 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1493 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1494 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1495 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1496 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1497 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1498 PetscFunctionReturn(0); 1499 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1500 PetscInt inodecount,inodelimit,*inodes; 1501 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1502 if (inodes) { 1503 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1504 } else { 1505 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1506 } 1507 PetscFunctionReturn(0); 1508 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1509 PetscFunctionReturn(0); 1510 } 1511 } else if (isbinary) { 1512 if (size == 1) { 1513 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1514 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1515 } else { 1516 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1517 } 1518 PetscFunctionReturn(0); 1519 } else if (isdraw) { 1520 PetscDraw draw; 1521 PetscBool isnull; 1522 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1523 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1524 } 1525 1526 if (size == 1) { 1527 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1528 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1529 } else { 1530 /* assemble the entire matrix onto first processor. */ 1531 Mat A; 1532 Mat_SeqAIJ *Aloc; 1533 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1534 MatScalar *a; 1535 1536 if (mat->rmap->N > 1024) { 1537 PetscBool flg = PETSC_FALSE; 1538 1539 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1540 if (!flg) { 1541 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1542 } 1543 } 1544 1545 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1546 if (!rank) { 1547 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1548 } else { 1549 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1550 } 1551 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1552 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1553 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1554 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1555 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1556 1557 /* copy over the A part */ 1558 Aloc = (Mat_SeqAIJ*)aij->A->data; 1559 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1560 row = mat->rmap->rstart; 1561 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1562 for (i=0; i<m; i++) { 1563 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1564 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1565 } 1566 aj = Aloc->j; 1567 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1568 1569 /* copy over the B part */ 1570 Aloc = (Mat_SeqAIJ*)aij->B->data; 1571 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1572 row = mat->rmap->rstart; 1573 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1574 ct = cols; 1575 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1576 for (i=0; i<m; i++) { 1577 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1578 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1579 } 1580 ierr = PetscFree(ct);CHKERRQ(ierr); 1581 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1582 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1583 /* 1584 Everyone has to call to draw the matrix since the graphics waits are 1585 synchronized across all processors that share the PetscDraw object 1586 */ 1587 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1588 if (!rank) { 1589 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1590 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1591 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1592 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1593 } 1594 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1595 ierr = MatDestroy(&A);CHKERRQ(ierr); 1596 } 1597 PetscFunctionReturn(0); 1598 } 1599 1600 #undef __FUNCT__ 1601 #define __FUNCT__ "MatView_MPIAIJ" 1602 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1603 { 1604 PetscErrorCode ierr; 1605 PetscBool iascii,isdraw,issocket,isbinary; 1606 1607 PetscFunctionBegin; 1608 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1609 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1610 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1611 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1612 if (iascii || isdraw || isbinary || issocket) { 1613 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1614 } else { 1615 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1616 } 1617 PetscFunctionReturn(0); 1618 } 1619 1620 #undef __FUNCT__ 1621 #define __FUNCT__ "MatSOR_MPIAIJ" 1622 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1623 { 1624 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1625 PetscErrorCode ierr; 1626 Vec bb1 = 0; 1627 PetscBool hasop; 1628 1629 PetscFunctionBegin; 1630 if (flag == SOR_APPLY_UPPER) { 1631 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1632 PetscFunctionReturn(0); 1633 } 1634 1635 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1636 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1637 } 1638 1639 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1640 if (flag & SOR_ZERO_INITIAL_GUESS) { 1641 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1642 its--; 1643 } 1644 1645 while (its--) { 1646 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1647 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1648 1649 /* update rhs: bb1 = bb - B*x */ 1650 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1651 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1652 1653 /* local sweep */ 1654 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1655 } 1656 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1657 if (flag & SOR_ZERO_INITIAL_GUESS) { 1658 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1659 its--; 1660 } 1661 while (its--) { 1662 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1663 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1664 1665 /* update rhs: bb1 = bb - B*x */ 1666 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1667 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1668 1669 /* local sweep */ 1670 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1671 } 1672 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1673 if (flag & SOR_ZERO_INITIAL_GUESS) { 1674 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1675 its--; 1676 } 1677 while (its--) { 1678 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1679 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1680 1681 /* update rhs: bb1 = bb - B*x */ 1682 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1683 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1684 1685 /* local sweep */ 1686 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1687 } 1688 } else if (flag & SOR_EISENSTAT) { 1689 Vec xx1; 1690 1691 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1692 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1693 1694 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1695 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1696 if (!mat->diag) { 1697 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1698 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1699 } 1700 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1701 if (hasop) { 1702 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1703 } else { 1704 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1705 } 1706 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1707 1708 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1709 1710 /* local sweep */ 1711 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1712 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1713 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1714 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1715 1716 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1717 PetscFunctionReturn(0); 1718 } 1719 1720 #undef __FUNCT__ 1721 #define __FUNCT__ "MatPermute_MPIAIJ" 1722 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1723 { 1724 Mat aA,aB,Aperm; 1725 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1726 PetscScalar *aa,*ba; 1727 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1728 PetscSF rowsf,sf; 1729 IS parcolp = PETSC_NULL; 1730 PetscBool done; 1731 PetscErrorCode ierr; 1732 1733 PetscFunctionBegin; 1734 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1735 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1736 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1737 ierr = PetscMalloc3(PetscMax(m,n),PetscInt,&work,m,PetscInt,&rdest,n,PetscInt,&cdest);CHKERRQ(ierr); 1738 1739 /* Invert row permutation to find out where my rows should go */ 1740 ierr = PetscSFCreate(((PetscObject)A)->comm,&rowsf);CHKERRQ(ierr); 1741 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,PETSC_NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1742 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1743 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1744 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr); 1745 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr); 1746 1747 /* Invert column permutation to find out where my columns should go */ 1748 ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr); 1749 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,PETSC_NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1750 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1751 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1752 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr); 1753 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr); 1754 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1755 1756 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1757 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1758 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1759 1760 /* Find out where my gcols should go */ 1761 ierr = MatGetSize(aB,PETSC_NULL,&ng);CHKERRQ(ierr); 1762 ierr = PetscMalloc(ng*sizeof(PetscInt),&gcdest);CHKERRQ(ierr); 1763 ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr); 1764 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,PETSC_NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1765 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1766 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1767 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1768 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1769 1770 ierr = PetscMalloc4(m,PetscInt,&dnnz,m,PetscInt,&onnz,m,PetscInt,&tdnnz,m,PetscInt,&tonnz);CHKERRQ(ierr); 1771 ierr = PetscMemzero(dnnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1772 ierr = PetscMemzero(onnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1773 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1774 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1775 for (i=0; i<m; i++) { 1776 PetscInt row = rdest[i],rowner; 1777 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1778 for (j=ai[i]; j<ai[i+1]; j++) { 1779 PetscInt cowner,col = cdest[aj[j]]; 1780 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1781 if (rowner == cowner) dnnz[i]++; 1782 else onnz[i]++; 1783 } 1784 for (j=bi[i]; j<bi[i+1]; j++) { 1785 PetscInt cowner,col = gcdest[bj[j]]; 1786 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1787 if (rowner == cowner) dnnz[i]++; 1788 else onnz[i]++; 1789 } 1790 } 1791 ierr = PetscMemzero(tdnnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1792 ierr = PetscMemzero(tonnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1793 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1794 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1795 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1796 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1797 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1798 1799 ierr = MatCreateAIJ(((PetscObject)A)->comm,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1800 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1801 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1802 for (i=0; i<m; i++) { 1803 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1804 PetscInt rowlen; 1805 rowlen = ai[i+1] - ai[i]; 1806 for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]]; 1807 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr); 1808 rowlen = bi[i+1] - bi[i]; 1809 for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]]; 1810 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr); 1811 } 1812 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1813 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1814 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1815 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1816 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1817 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1818 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1819 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1820 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1821 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1822 *B = Aperm; 1823 PetscFunctionReturn(0); 1824 } 1825 1826 #undef __FUNCT__ 1827 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1828 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1829 { 1830 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1831 Mat A = mat->A,B = mat->B; 1832 PetscErrorCode ierr; 1833 PetscReal isend[5],irecv[5]; 1834 1835 PetscFunctionBegin; 1836 info->block_size = 1.0; 1837 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1838 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1839 isend[3] = info->memory; isend[4] = info->mallocs; 1840 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1841 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1842 isend[3] += info->memory; isend[4] += info->mallocs; 1843 if (flag == MAT_LOCAL) { 1844 info->nz_used = isend[0]; 1845 info->nz_allocated = isend[1]; 1846 info->nz_unneeded = isend[2]; 1847 info->memory = isend[3]; 1848 info->mallocs = isend[4]; 1849 } else if (flag == MAT_GLOBAL_MAX) { 1850 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1851 info->nz_used = irecv[0]; 1852 info->nz_allocated = irecv[1]; 1853 info->nz_unneeded = irecv[2]; 1854 info->memory = irecv[3]; 1855 info->mallocs = irecv[4]; 1856 } else if (flag == MAT_GLOBAL_SUM) { 1857 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1858 info->nz_used = irecv[0]; 1859 info->nz_allocated = irecv[1]; 1860 info->nz_unneeded = irecv[2]; 1861 info->memory = irecv[3]; 1862 info->mallocs = irecv[4]; 1863 } 1864 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1865 info->fill_ratio_needed = 0; 1866 info->factor_mallocs = 0; 1867 PetscFunctionReturn(0); 1868 } 1869 1870 #undef __FUNCT__ 1871 #define __FUNCT__ "MatSetOption_MPIAIJ" 1872 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1873 { 1874 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1875 PetscErrorCode ierr; 1876 1877 PetscFunctionBegin; 1878 switch (op) { 1879 case MAT_NEW_NONZERO_LOCATIONS: 1880 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1881 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1882 case MAT_KEEP_NONZERO_PATTERN: 1883 case MAT_NEW_NONZERO_LOCATION_ERR: 1884 case MAT_USE_INODES: 1885 case MAT_IGNORE_ZERO_ENTRIES: 1886 MatCheckPreallocated(A,1); 1887 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1888 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1889 break; 1890 case MAT_ROW_ORIENTED: 1891 a->roworiented = flg; 1892 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1893 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1894 break; 1895 case MAT_NEW_DIAGONALS: 1896 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1897 break; 1898 case MAT_IGNORE_OFF_PROC_ENTRIES: 1899 a->donotstash = flg; 1900 break; 1901 case MAT_SPD: 1902 A->spd_set = PETSC_TRUE; 1903 A->spd = flg; 1904 if (flg) { 1905 A->symmetric = PETSC_TRUE; 1906 A->structurally_symmetric = PETSC_TRUE; 1907 A->symmetric_set = PETSC_TRUE; 1908 A->structurally_symmetric_set = PETSC_TRUE; 1909 } 1910 break; 1911 case MAT_SYMMETRIC: 1912 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1913 break; 1914 case MAT_STRUCTURALLY_SYMMETRIC: 1915 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1916 break; 1917 case MAT_HERMITIAN: 1918 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1919 break; 1920 case MAT_SYMMETRY_ETERNAL: 1921 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1922 break; 1923 default: 1924 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1925 } 1926 PetscFunctionReturn(0); 1927 } 1928 1929 #undef __FUNCT__ 1930 #define __FUNCT__ "MatGetRow_MPIAIJ" 1931 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1932 { 1933 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1934 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1935 PetscErrorCode ierr; 1936 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1937 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1938 PetscInt *cmap,*idx_p; 1939 1940 PetscFunctionBegin; 1941 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1942 mat->getrowactive = PETSC_TRUE; 1943 1944 if (!mat->rowvalues && (idx || v)) { 1945 /* 1946 allocate enough space to hold information from the longest row. 1947 */ 1948 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1949 PetscInt max = 1,tmp; 1950 for (i=0; i<matin->rmap->n; i++) { 1951 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1952 if (max < tmp) { max = tmp; } 1953 } 1954 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1955 } 1956 1957 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1958 lrow = row - rstart; 1959 1960 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1961 if (!v) {pvA = 0; pvB = 0;} 1962 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1963 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1964 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1965 nztot = nzA + nzB; 1966 1967 cmap = mat->garray; 1968 if (v || idx) { 1969 if (nztot) { 1970 /* Sort by increasing column numbers, assuming A and B already sorted */ 1971 PetscInt imark = -1; 1972 if (v) { 1973 *v = v_p = mat->rowvalues; 1974 for (i=0; i<nzB; i++) { 1975 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1976 else break; 1977 } 1978 imark = i; 1979 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1980 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1981 } 1982 if (idx) { 1983 *idx = idx_p = mat->rowindices; 1984 if (imark > -1) { 1985 for (i=0; i<imark; i++) { 1986 idx_p[i] = cmap[cworkB[i]]; 1987 } 1988 } else { 1989 for (i=0; i<nzB; i++) { 1990 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1991 else break; 1992 } 1993 imark = i; 1994 } 1995 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1996 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1997 } 1998 } else { 1999 if (idx) *idx = 0; 2000 if (v) *v = 0; 2001 } 2002 } 2003 *nz = nztot; 2004 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 2005 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 2006 PetscFunctionReturn(0); 2007 } 2008 2009 #undef __FUNCT__ 2010 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 2011 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 2012 { 2013 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2014 2015 PetscFunctionBegin; 2016 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 2017 aij->getrowactive = PETSC_FALSE; 2018 PetscFunctionReturn(0); 2019 } 2020 2021 #undef __FUNCT__ 2022 #define __FUNCT__ "MatNorm_MPIAIJ" 2023 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 2024 { 2025 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2026 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 2027 PetscErrorCode ierr; 2028 PetscInt i,j,cstart = mat->cmap->rstart; 2029 PetscReal sum = 0.0; 2030 MatScalar *v; 2031 2032 PetscFunctionBegin; 2033 if (aij->size == 1) { 2034 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 2035 } else { 2036 if (type == NORM_FROBENIUS) { 2037 v = amat->a; 2038 for (i=0; i<amat->nz; i++) { 2039 #if defined(PETSC_USE_COMPLEX) 2040 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 2041 #else 2042 sum += (*v)*(*v); v++; 2043 #endif 2044 } 2045 v = bmat->a; 2046 for (i=0; i<bmat->nz; i++) { 2047 #if defined(PETSC_USE_COMPLEX) 2048 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 2049 #else 2050 sum += (*v)*(*v); v++; 2051 #endif 2052 } 2053 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 2054 *norm = PetscSqrtReal(*norm); 2055 } else if (type == NORM_1) { /* max column norm */ 2056 PetscReal *tmp,*tmp2; 2057 PetscInt *jj,*garray = aij->garray; 2058 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 2059 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 2060 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 2061 *norm = 0.0; 2062 v = amat->a; jj = amat->j; 2063 for (j=0; j<amat->nz; j++) { 2064 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 2065 } 2066 v = bmat->a; jj = bmat->j; 2067 for (j=0; j<bmat->nz; j++) { 2068 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 2069 } 2070 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 2071 for (j=0; j<mat->cmap->N; j++) { 2072 if (tmp2[j] > *norm) *norm = tmp2[j]; 2073 } 2074 ierr = PetscFree(tmp);CHKERRQ(ierr); 2075 ierr = PetscFree(tmp2);CHKERRQ(ierr); 2076 } else if (type == NORM_INFINITY) { /* max row norm */ 2077 PetscReal ntemp = 0.0; 2078 for (j=0; j<aij->A->rmap->n; j++) { 2079 v = amat->a + amat->i[j]; 2080 sum = 0.0; 2081 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 2082 sum += PetscAbsScalar(*v); v++; 2083 } 2084 v = bmat->a + bmat->i[j]; 2085 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2086 sum += PetscAbsScalar(*v); v++; 2087 } 2088 if (sum > ntemp) ntemp = sum; 2089 } 2090 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 2091 } else { 2092 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 2093 } 2094 } 2095 PetscFunctionReturn(0); 2096 } 2097 2098 #undef __FUNCT__ 2099 #define __FUNCT__ "MatTranspose_MPIAIJ" 2100 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2101 { 2102 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2103 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2104 PetscErrorCode ierr; 2105 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 2106 PetscInt cstart=A->cmap->rstart,ncol; 2107 Mat B; 2108 MatScalar *array; 2109 2110 PetscFunctionBegin; 2111 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2112 2113 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2114 ai = Aloc->i; aj = Aloc->j; 2115 bi = Bloc->i; bj = Bloc->j; 2116 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2117 PetscInt *d_nnz,*g_nnz,*o_nnz; 2118 PetscSFNode *oloc; 2119 PETSC_UNUSED PetscSF sf; 2120 2121 ierr = PetscMalloc4(na,PetscInt,&d_nnz,na,PetscInt,&o_nnz,nb,PetscInt,&g_nnz,nb,PetscSFNode,&oloc);CHKERRQ(ierr); 2122 /* compute d_nnz for preallocation */ 2123 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2124 for (i=0; i<ai[ma]; i++) { 2125 d_nnz[aj[i]] ++; 2126 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2127 } 2128 /* compute local off-diagonal contributions */ 2129 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 2130 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2131 /* map those to global */ 2132 ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr); 2133 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,PETSC_NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2134 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2135 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2136 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2137 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2138 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2139 2140 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 2141 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2142 ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs); CHKERRQ(ierr); 2143 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2144 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2145 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2146 } else { 2147 B = *matout; 2148 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2149 for (i=0; i<ai[ma]; i++){ 2150 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2151 } 2152 } 2153 2154 /* copy over the A part */ 2155 array = Aloc->a; 2156 row = A->rmap->rstart; 2157 for (i=0; i<ma; i++) { 2158 ncol = ai[i+1]-ai[i]; 2159 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2160 row++; array += ncol; aj += ncol; 2161 } 2162 aj = Aloc->j; 2163 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2164 2165 /* copy over the B part */ 2166 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2167 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2168 array = Bloc->a; 2169 row = A->rmap->rstart; 2170 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 2171 cols_tmp = cols; 2172 for (i=0; i<mb; i++) { 2173 ncol = bi[i+1]-bi[i]; 2174 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2175 row++; array += ncol; cols_tmp += ncol; 2176 } 2177 ierr = PetscFree(cols);CHKERRQ(ierr); 2178 2179 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2180 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2181 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2182 *matout = B; 2183 } else { 2184 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2185 } 2186 PetscFunctionReturn(0); 2187 } 2188 2189 #undef __FUNCT__ 2190 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2191 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2192 { 2193 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2194 Mat a = aij->A,b = aij->B; 2195 PetscErrorCode ierr; 2196 PetscInt s1,s2,s3; 2197 2198 PetscFunctionBegin; 2199 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2200 if (rr) { 2201 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2202 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2203 /* Overlap communication with computation. */ 2204 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2205 } 2206 if (ll) { 2207 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2208 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2209 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2210 } 2211 /* scale the diagonal block */ 2212 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2213 2214 if (rr) { 2215 /* Do a scatter end and then right scale the off-diagonal block */ 2216 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2217 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2218 } 2219 2220 PetscFunctionReturn(0); 2221 } 2222 2223 #undef __FUNCT__ 2224 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2225 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2226 { 2227 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2228 PetscErrorCode ierr; 2229 2230 PetscFunctionBegin; 2231 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2232 PetscFunctionReturn(0); 2233 } 2234 2235 #undef __FUNCT__ 2236 #define __FUNCT__ "MatEqual_MPIAIJ" 2237 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2238 { 2239 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2240 Mat a,b,c,d; 2241 PetscBool flg; 2242 PetscErrorCode ierr; 2243 2244 PetscFunctionBegin; 2245 a = matA->A; b = matA->B; 2246 c = matB->A; d = matB->B; 2247 2248 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2249 if (flg) { 2250 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2251 } 2252 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2253 PetscFunctionReturn(0); 2254 } 2255 2256 #undef __FUNCT__ 2257 #define __FUNCT__ "MatCopy_MPIAIJ" 2258 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2259 { 2260 PetscErrorCode ierr; 2261 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2262 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2263 2264 PetscFunctionBegin; 2265 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2266 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2267 /* because of the column compression in the off-processor part of the matrix a->B, 2268 the number of columns in a->B and b->B may be different, hence we cannot call 2269 the MatCopy() directly on the two parts. If need be, we can provide a more 2270 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2271 then copying the submatrices */ 2272 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2273 } else { 2274 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2275 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2276 } 2277 PetscFunctionReturn(0); 2278 } 2279 2280 #undef __FUNCT__ 2281 #define __FUNCT__ "MatSetUp_MPIAIJ" 2282 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2283 { 2284 PetscErrorCode ierr; 2285 2286 PetscFunctionBegin; 2287 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2288 PetscFunctionReturn(0); 2289 } 2290 2291 #undef __FUNCT__ 2292 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2293 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2294 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt* nnz) 2295 { 2296 PetscInt i,m=Y->rmap->N; 2297 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2298 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2299 const PetscInt *xi = x->i,*yi = y->i; 2300 2301 PetscFunctionBegin; 2302 /* Set the number of nonzeros in the new matrix */ 2303 for (i=0; i<m; i++) { 2304 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2305 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2306 nnz[i] = 0; 2307 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2308 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2309 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2310 nnz[i]++; 2311 } 2312 for (; k<nzy; k++) nnz[i]++; 2313 } 2314 PetscFunctionReturn(0); 2315 } 2316 2317 #undef __FUNCT__ 2318 #define __FUNCT__ "MatAXPY_MPIAIJ" 2319 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2320 { 2321 PetscErrorCode ierr; 2322 PetscInt i; 2323 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 2324 PetscBLASInt bnz,one=1; 2325 Mat_SeqAIJ *x,*y; 2326 2327 PetscFunctionBegin; 2328 if (str == SAME_NONZERO_PATTERN) { 2329 PetscScalar alpha = a; 2330 x = (Mat_SeqAIJ *)xx->A->data; 2331 y = (Mat_SeqAIJ *)yy->A->data; 2332 bnz = PetscBLASIntCast(x->nz); 2333 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2334 x = (Mat_SeqAIJ *)xx->B->data; 2335 y = (Mat_SeqAIJ *)yy->B->data; 2336 bnz = PetscBLASIntCast(x->nz); 2337 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2338 } else if (str == SUBSET_NONZERO_PATTERN) { 2339 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2340 2341 x = (Mat_SeqAIJ *)xx->B->data; 2342 y = (Mat_SeqAIJ *)yy->B->data; 2343 if (y->xtoy && y->XtoY != xx->B) { 2344 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2345 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2346 } 2347 if (!y->xtoy) { /* get xtoy */ 2348 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2349 y->XtoY = xx->B; 2350 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2351 } 2352 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2353 } else { 2354 Mat B; 2355 PetscInt *nnz_d,*nnz_o; 2356 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2357 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2358 ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr); 2359 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2360 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2361 ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr); 2362 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2363 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2364 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2365 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2366 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2367 ierr = MatHeaderReplace(Y,B); 2368 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2369 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2370 } 2371 PetscFunctionReturn(0); 2372 } 2373 2374 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2375 2376 #undef __FUNCT__ 2377 #define __FUNCT__ "MatConjugate_MPIAIJ" 2378 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2379 { 2380 #if defined(PETSC_USE_COMPLEX) 2381 PetscErrorCode ierr; 2382 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2383 2384 PetscFunctionBegin; 2385 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2386 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2387 #else 2388 PetscFunctionBegin; 2389 #endif 2390 PetscFunctionReturn(0); 2391 } 2392 2393 #undef __FUNCT__ 2394 #define __FUNCT__ "MatRealPart_MPIAIJ" 2395 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2396 { 2397 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2398 PetscErrorCode ierr; 2399 2400 PetscFunctionBegin; 2401 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2402 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2403 PetscFunctionReturn(0); 2404 } 2405 2406 #undef __FUNCT__ 2407 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2408 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2409 { 2410 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2411 PetscErrorCode ierr; 2412 2413 PetscFunctionBegin; 2414 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2415 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2416 PetscFunctionReturn(0); 2417 } 2418 2419 #ifdef PETSC_HAVE_PBGL 2420 2421 #include <boost/parallel/mpi/bsp_process_group.hpp> 2422 #include <boost/graph/distributed/ilu_default_graph.hpp> 2423 #include <boost/graph/distributed/ilu_0_block.hpp> 2424 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2425 #include <boost/graph/distributed/petsc/interface.hpp> 2426 #include <boost/multi_array.hpp> 2427 #include <boost/parallel/distributed_property_map->hpp> 2428 2429 #undef __FUNCT__ 2430 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2431 /* 2432 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2433 */ 2434 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2435 { 2436 namespace petsc = boost::distributed::petsc; 2437 2438 namespace graph_dist = boost::graph::distributed; 2439 using boost::graph::distributed::ilu_default::process_group_type; 2440 using boost::graph::ilu_permuted; 2441 2442 PetscBool row_identity, col_identity; 2443 PetscContainer c; 2444 PetscInt m, n, M, N; 2445 PetscErrorCode ierr; 2446 2447 PetscFunctionBegin; 2448 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2449 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2450 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2451 if (!row_identity || !col_identity) { 2452 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2453 } 2454 2455 process_group_type pg; 2456 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2457 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2458 lgraph_type& level_graph = *lgraph_p; 2459 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2460 2461 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2462 ilu_permuted(level_graph); 2463 2464 /* put together the new matrix */ 2465 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 2466 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2467 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2468 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2469 ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs); CHKERRQ(ierr); 2470 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2471 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2472 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2473 2474 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 2475 ierr = PetscContainerSetPointer(c, lgraph_p); 2476 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2477 ierr = PetscContainerDestroy(&c); 2478 PetscFunctionReturn(0); 2479 } 2480 2481 #undef __FUNCT__ 2482 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2483 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2484 { 2485 PetscFunctionBegin; 2486 PetscFunctionReturn(0); 2487 } 2488 2489 #undef __FUNCT__ 2490 #define __FUNCT__ "MatSolve_MPIAIJ" 2491 /* 2492 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2493 */ 2494 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2495 { 2496 namespace graph_dist = boost::graph::distributed; 2497 2498 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2499 lgraph_type* lgraph_p; 2500 PetscContainer c; 2501 PetscErrorCode ierr; 2502 2503 PetscFunctionBegin; 2504 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 2505 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 2506 ierr = VecCopy(b, x);CHKERRQ(ierr); 2507 2508 PetscScalar* array_x; 2509 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2510 PetscInt sx; 2511 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2512 2513 PetscScalar* array_b; 2514 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2515 PetscInt sb; 2516 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2517 2518 lgraph_type& level_graph = *lgraph_p; 2519 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2520 2521 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2522 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2523 ref_x(array_x, boost::extents[num_vertices(graph)]); 2524 2525 typedef boost::iterator_property_map<array_ref_type::iterator, 2526 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2527 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2528 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2529 2530 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2531 2532 PetscFunctionReturn(0); 2533 } 2534 #endif 2535 2536 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2537 PetscInt nzlocal,nsends,nrecvs; 2538 PetscMPIInt *send_rank,*recv_rank; 2539 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2540 PetscScalar *sbuf_a,**rbuf_a; 2541 PetscErrorCode (*Destroy)(Mat); 2542 } Mat_Redundant; 2543 2544 #undef __FUNCT__ 2545 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2546 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2547 { 2548 PetscErrorCode ierr; 2549 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2550 PetscInt i; 2551 2552 PetscFunctionBegin; 2553 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2554 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2555 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2556 for (i=0; i<redund->nrecvs; i++){ 2557 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2558 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2559 } 2560 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2561 ierr = PetscFree(redund);CHKERRQ(ierr); 2562 PetscFunctionReturn(0); 2563 } 2564 2565 #undef __FUNCT__ 2566 #define __FUNCT__ "MatDestroy_MatRedundant" 2567 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2568 { 2569 PetscErrorCode ierr; 2570 PetscContainer container; 2571 Mat_Redundant *redund=PETSC_NULL; 2572 2573 PetscFunctionBegin; 2574 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2575 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2576 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2577 A->ops->destroy = redund->Destroy; 2578 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2579 if (A->ops->destroy) { 2580 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2581 } 2582 PetscFunctionReturn(0); 2583 } 2584 2585 #undef __FUNCT__ 2586 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2587 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2588 { 2589 PetscMPIInt rank,size; 2590 MPI_Comm comm=((PetscObject)mat)->comm; 2591 PetscErrorCode ierr; 2592 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2593 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2594 PetscInt *rowrange=mat->rmap->range; 2595 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2596 Mat A=aij->A,B=aij->B,C=*matredundant; 2597 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2598 PetscScalar *sbuf_a; 2599 PetscInt nzlocal=a->nz+b->nz; 2600 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2601 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2602 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2603 MatScalar *aworkA,*aworkB; 2604 PetscScalar *vals; 2605 PetscMPIInt tag1,tag2,tag3,imdex; 2606 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2607 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2608 MPI_Status recv_status,*send_status; 2609 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2610 PetscInt **rbuf_j=PETSC_NULL; 2611 PetscScalar **rbuf_a=PETSC_NULL; 2612 Mat_Redundant *redund=PETSC_NULL; 2613 PetscContainer container; 2614 2615 PetscFunctionBegin; 2616 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2617 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2618 2619 if (reuse == MAT_REUSE_MATRIX) { 2620 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2621 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2622 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2623 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2624 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2625 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2626 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2627 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2628 2629 nsends = redund->nsends; 2630 nrecvs = redund->nrecvs; 2631 send_rank = redund->send_rank; 2632 recv_rank = redund->recv_rank; 2633 sbuf_nz = redund->sbuf_nz; 2634 rbuf_nz = redund->rbuf_nz; 2635 sbuf_j = redund->sbuf_j; 2636 sbuf_a = redund->sbuf_a; 2637 rbuf_j = redund->rbuf_j; 2638 rbuf_a = redund->rbuf_a; 2639 } 2640 2641 if (reuse == MAT_INITIAL_MATRIX){ 2642 PetscMPIInt subrank,subsize; 2643 PetscInt nleftover,np_subcomm; 2644 /* get the destination processors' id send_rank, nsends and nrecvs */ 2645 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2646 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2647 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2648 np_subcomm = size/nsubcomm; 2649 nleftover = size - nsubcomm*np_subcomm; 2650 nsends = 0; nrecvs = 0; 2651 for (i=0; i<size; i++){ /* i=rank*/ 2652 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2653 send_rank[nsends] = i; nsends++; 2654 recv_rank[nrecvs++] = i; 2655 } 2656 } 2657 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2658 i = size-nleftover-1; 2659 j = 0; 2660 while (j < nsubcomm - nleftover){ 2661 send_rank[nsends++] = i; 2662 i--; j++; 2663 } 2664 } 2665 2666 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2667 for (i=0; i<nleftover; i++){ 2668 recv_rank[nrecvs++] = size-nleftover+i; 2669 } 2670 } 2671 2672 /* allocate sbuf_j, sbuf_a */ 2673 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2674 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2675 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2676 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2677 2678 /* copy mat's local entries into the buffers */ 2679 if (reuse == MAT_INITIAL_MATRIX){ 2680 rownz_max = 0; 2681 rptr = sbuf_j; 2682 cols = sbuf_j + rend-rstart + 1; 2683 vals = sbuf_a; 2684 rptr[0] = 0; 2685 for (i=0; i<rend-rstart; i++){ 2686 row = i + rstart; 2687 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2688 ncols = nzA + nzB; 2689 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2690 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2691 /* load the column indices for this row into cols */ 2692 lwrite = 0; 2693 for (l=0; l<nzB; l++) { 2694 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2695 vals[lwrite] = aworkB[l]; 2696 cols[lwrite++] = ctmp; 2697 } 2698 } 2699 for (l=0; l<nzA; l++){ 2700 vals[lwrite] = aworkA[l]; 2701 cols[lwrite++] = cstart + cworkA[l]; 2702 } 2703 for (l=0; l<nzB; l++) { 2704 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2705 vals[lwrite] = aworkB[l]; 2706 cols[lwrite++] = ctmp; 2707 } 2708 } 2709 vals += ncols; 2710 cols += ncols; 2711 rptr[i+1] = rptr[i] + ncols; 2712 if (rownz_max < ncols) rownz_max = ncols; 2713 } 2714 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2715 } else { /* only copy matrix values into sbuf_a */ 2716 rptr = sbuf_j; 2717 vals = sbuf_a; 2718 rptr[0] = 0; 2719 for (i=0; i<rend-rstart; i++){ 2720 row = i + rstart; 2721 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2722 ncols = nzA + nzB; 2723 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2724 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2725 lwrite = 0; 2726 for (l=0; l<nzB; l++) { 2727 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2728 } 2729 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2730 for (l=0; l<nzB; l++) { 2731 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2732 } 2733 vals += ncols; 2734 rptr[i+1] = rptr[i] + ncols; 2735 } 2736 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2737 2738 /* send nzlocal to others, and recv other's nzlocal */ 2739 /*--------------------------------------------------*/ 2740 if (reuse == MAT_INITIAL_MATRIX){ 2741 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2742 s_waits2 = s_waits3 + nsends; 2743 s_waits1 = s_waits2 + nsends; 2744 r_waits1 = s_waits1 + nsends; 2745 r_waits2 = r_waits1 + nrecvs; 2746 r_waits3 = r_waits2 + nrecvs; 2747 } else { 2748 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2749 r_waits3 = s_waits3 + nsends; 2750 } 2751 2752 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2753 if (reuse == MAT_INITIAL_MATRIX){ 2754 /* get new tags to keep the communication clean */ 2755 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2756 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2757 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2758 2759 /* post receives of other's nzlocal */ 2760 for (i=0; i<nrecvs; i++){ 2761 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2762 } 2763 /* send nzlocal to others */ 2764 for (i=0; i<nsends; i++){ 2765 sbuf_nz[i] = nzlocal; 2766 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2767 } 2768 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2769 count = nrecvs; 2770 while (count) { 2771 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2772 recv_rank[imdex] = recv_status.MPI_SOURCE; 2773 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2774 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2775 2776 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2777 rbuf_nz[imdex] += i + 2; 2778 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2779 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2780 count--; 2781 } 2782 /* wait on sends of nzlocal */ 2783 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2784 /* send mat->i,j to others, and recv from other's */ 2785 /*------------------------------------------------*/ 2786 for (i=0; i<nsends; i++){ 2787 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2788 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2789 } 2790 /* wait on receives of mat->i,j */ 2791 /*------------------------------*/ 2792 count = nrecvs; 2793 while (count) { 2794 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2795 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2796 count--; 2797 } 2798 /* wait on sends of mat->i,j */ 2799 /*---------------------------*/ 2800 if (nsends) { 2801 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2802 } 2803 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2804 2805 /* post receives, send and receive mat->a */ 2806 /*----------------------------------------*/ 2807 for (imdex=0; imdex<nrecvs; imdex++) { 2808 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2809 } 2810 for (i=0; i<nsends; i++){ 2811 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2812 } 2813 count = nrecvs; 2814 while (count) { 2815 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2816 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2817 count--; 2818 } 2819 if (nsends) { 2820 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2821 } 2822 2823 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2824 2825 /* create redundant matrix */ 2826 /*-------------------------*/ 2827 if (reuse == MAT_INITIAL_MATRIX){ 2828 /* compute rownz_max for preallocation */ 2829 for (imdex=0; imdex<nrecvs; imdex++){ 2830 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2831 rptr = rbuf_j[imdex]; 2832 for (i=0; i<j; i++){ 2833 ncols = rptr[i+1] - rptr[i]; 2834 if (rownz_max < ncols) rownz_max = ncols; 2835 } 2836 } 2837 2838 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2839 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2840 ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs); CHKERRQ(ierr); 2841 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2842 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2843 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2844 } else { 2845 C = *matredundant; 2846 } 2847 2848 /* insert local matrix entries */ 2849 rptr = sbuf_j; 2850 cols = sbuf_j + rend-rstart + 1; 2851 vals = sbuf_a; 2852 for (i=0; i<rend-rstart; i++){ 2853 row = i + rstart; 2854 ncols = rptr[i+1] - rptr[i]; 2855 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2856 vals += ncols; 2857 cols += ncols; 2858 } 2859 /* insert received matrix entries */ 2860 for (imdex=0; imdex<nrecvs; imdex++){ 2861 rstart = rowrange[recv_rank[imdex]]; 2862 rend = rowrange[recv_rank[imdex]+1]; 2863 rptr = rbuf_j[imdex]; 2864 cols = rbuf_j[imdex] + rend-rstart + 1; 2865 vals = rbuf_a[imdex]; 2866 for (i=0; i<rend-rstart; i++){ 2867 row = i + rstart; 2868 ncols = rptr[i+1] - rptr[i]; 2869 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2870 vals += ncols; 2871 cols += ncols; 2872 } 2873 } 2874 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2875 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2876 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2877 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2878 if (reuse == MAT_INITIAL_MATRIX) { 2879 PetscContainer container; 2880 *matredundant = C; 2881 /* create a supporting struct and attach it to C for reuse */ 2882 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2883 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2884 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2885 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2886 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2887 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 2888 2889 redund->nzlocal = nzlocal; 2890 redund->nsends = nsends; 2891 redund->nrecvs = nrecvs; 2892 redund->send_rank = send_rank; 2893 redund->recv_rank = recv_rank; 2894 redund->sbuf_nz = sbuf_nz; 2895 redund->rbuf_nz = rbuf_nz; 2896 redund->sbuf_j = sbuf_j; 2897 redund->sbuf_a = sbuf_a; 2898 redund->rbuf_j = rbuf_j; 2899 redund->rbuf_a = rbuf_a; 2900 2901 redund->Destroy = C->ops->destroy; 2902 C->ops->destroy = MatDestroy_MatRedundant; 2903 } 2904 PetscFunctionReturn(0); 2905 } 2906 2907 #undef __FUNCT__ 2908 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2909 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2910 { 2911 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2912 PetscErrorCode ierr; 2913 PetscInt i,*idxb = 0; 2914 PetscScalar *va,*vb; 2915 Vec vtmp; 2916 2917 PetscFunctionBegin; 2918 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2919 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2920 if (idx) { 2921 for (i=0; i<A->rmap->n; i++) { 2922 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2923 } 2924 } 2925 2926 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2927 if (idx) { 2928 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2929 } 2930 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2931 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2932 2933 for (i=0; i<A->rmap->n; i++){ 2934 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2935 va[i] = vb[i]; 2936 if (idx) idx[i] = a->garray[idxb[i]]; 2937 } 2938 } 2939 2940 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2941 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2942 ierr = PetscFree(idxb);CHKERRQ(ierr); 2943 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2944 PetscFunctionReturn(0); 2945 } 2946 2947 #undef __FUNCT__ 2948 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2949 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2950 { 2951 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2952 PetscErrorCode ierr; 2953 PetscInt i,*idxb = 0; 2954 PetscScalar *va,*vb; 2955 Vec vtmp; 2956 2957 PetscFunctionBegin; 2958 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2959 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2960 if (idx) { 2961 for (i=0; i<A->cmap->n; i++) { 2962 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2963 } 2964 } 2965 2966 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2967 if (idx) { 2968 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2969 } 2970 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2971 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2972 2973 for (i=0; i<A->rmap->n; i++){ 2974 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2975 va[i] = vb[i]; 2976 if (idx) idx[i] = a->garray[idxb[i]]; 2977 } 2978 } 2979 2980 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2981 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2982 ierr = PetscFree(idxb);CHKERRQ(ierr); 2983 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2984 PetscFunctionReturn(0); 2985 } 2986 2987 #undef __FUNCT__ 2988 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2989 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2990 { 2991 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2992 PetscInt n = A->rmap->n; 2993 PetscInt cstart = A->cmap->rstart; 2994 PetscInt *cmap = mat->garray; 2995 PetscInt *diagIdx, *offdiagIdx; 2996 Vec diagV, offdiagV; 2997 PetscScalar *a, *diagA, *offdiagA; 2998 PetscInt r; 2999 PetscErrorCode ierr; 3000 3001 PetscFunctionBegin; 3002 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 3003 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 3004 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 3005 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 3006 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 3007 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 3008 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 3009 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3010 for (r = 0; r < n; ++r) { 3011 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 3012 a[r] = diagA[r]; 3013 idx[r] = cstart + diagIdx[r]; 3014 } else { 3015 a[r] = offdiagA[r]; 3016 idx[r] = cmap[offdiagIdx[r]]; 3017 } 3018 } 3019 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3020 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3021 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3022 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3023 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3024 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3025 PetscFunctionReturn(0); 3026 } 3027 3028 #undef __FUNCT__ 3029 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 3030 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 3031 { 3032 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 3033 PetscInt n = A->rmap->n; 3034 PetscInt cstart = A->cmap->rstart; 3035 PetscInt *cmap = mat->garray; 3036 PetscInt *diagIdx, *offdiagIdx; 3037 Vec diagV, offdiagV; 3038 PetscScalar *a, *diagA, *offdiagA; 3039 PetscInt r; 3040 PetscErrorCode ierr; 3041 3042 PetscFunctionBegin; 3043 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 3044 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 3045 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 3046 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 3047 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 3048 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 3049 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 3050 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3051 for (r = 0; r < n; ++r) { 3052 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 3053 a[r] = diagA[r]; 3054 idx[r] = cstart + diagIdx[r]; 3055 } else { 3056 a[r] = offdiagA[r]; 3057 idx[r] = cmap[offdiagIdx[r]]; 3058 } 3059 } 3060 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3061 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3062 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3063 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3064 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3065 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3066 PetscFunctionReturn(0); 3067 } 3068 3069 #undef __FUNCT__ 3070 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 3071 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 3072 { 3073 PetscErrorCode ierr; 3074 Mat *dummy; 3075 3076 PetscFunctionBegin; 3077 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 3078 *newmat = *dummy; 3079 ierr = PetscFree(dummy);CHKERRQ(ierr); 3080 PetscFunctionReturn(0); 3081 } 3082 3083 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 3084 3085 #undef __FUNCT__ 3086 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 3087 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3088 { 3089 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3090 PetscErrorCode ierr; 3091 3092 PetscFunctionBegin; 3093 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3094 PetscFunctionReturn(0); 3095 } 3096 3097 #undef __FUNCT__ 3098 #define __FUNCT__ "MatSetRandom_MPIAIJ" 3099 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 3100 { 3101 PetscErrorCode ierr; 3102 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 3103 3104 PetscFunctionBegin; 3105 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 3106 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 3107 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3108 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3109 PetscFunctionReturn(0); 3110 } 3111 3112 /* -------------------------------------------------------------------*/ 3113 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3114 MatGetRow_MPIAIJ, 3115 MatRestoreRow_MPIAIJ, 3116 MatMult_MPIAIJ, 3117 /* 4*/ MatMultAdd_MPIAIJ, 3118 MatMultTranspose_MPIAIJ, 3119 MatMultTransposeAdd_MPIAIJ, 3120 #ifdef PETSC_HAVE_PBGL 3121 MatSolve_MPIAIJ, 3122 #else 3123 0, 3124 #endif 3125 0, 3126 0, 3127 /*10*/ 0, 3128 0, 3129 0, 3130 MatSOR_MPIAIJ, 3131 MatTranspose_MPIAIJ, 3132 /*15*/ MatGetInfo_MPIAIJ, 3133 MatEqual_MPIAIJ, 3134 MatGetDiagonal_MPIAIJ, 3135 MatDiagonalScale_MPIAIJ, 3136 MatNorm_MPIAIJ, 3137 /*20*/ MatAssemblyBegin_MPIAIJ, 3138 MatAssemblyEnd_MPIAIJ, 3139 MatSetOption_MPIAIJ, 3140 MatZeroEntries_MPIAIJ, 3141 /*24*/ MatZeroRows_MPIAIJ, 3142 0, 3143 #ifdef PETSC_HAVE_PBGL 3144 0, 3145 #else 3146 0, 3147 #endif 3148 0, 3149 0, 3150 /*29*/ MatSetUp_MPIAIJ, 3151 #ifdef PETSC_HAVE_PBGL 3152 0, 3153 #else 3154 0, 3155 #endif 3156 0, 3157 0, 3158 0, 3159 /*34*/ MatDuplicate_MPIAIJ, 3160 0, 3161 0, 3162 0, 3163 0, 3164 /*39*/ MatAXPY_MPIAIJ, 3165 MatGetSubMatrices_MPIAIJ, 3166 MatIncreaseOverlap_MPIAIJ, 3167 MatGetValues_MPIAIJ, 3168 MatCopy_MPIAIJ, 3169 /*44*/ MatGetRowMax_MPIAIJ, 3170 MatScale_MPIAIJ, 3171 0, 3172 0, 3173 MatZeroRowsColumns_MPIAIJ, 3174 /*49*/ MatSetRandom_MPIAIJ, 3175 0, 3176 0, 3177 0, 3178 0, 3179 /*54*/ MatFDColoringCreate_MPIAIJ, 3180 0, 3181 MatSetUnfactored_MPIAIJ, 3182 MatPermute_MPIAIJ, 3183 0, 3184 /*59*/ MatGetSubMatrix_MPIAIJ, 3185 MatDestroy_MPIAIJ, 3186 MatView_MPIAIJ, 3187 0, 3188 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 3189 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 3190 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 3191 0, 3192 0, 3193 0, 3194 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3195 MatGetRowMinAbs_MPIAIJ, 3196 0, 3197 MatSetColoring_MPIAIJ, 3198 #if defined(PETSC_HAVE_ADIC) 3199 MatSetValuesAdic_MPIAIJ, 3200 #else 3201 0, 3202 #endif 3203 MatSetValuesAdifor_MPIAIJ, 3204 /*75*/ MatFDColoringApply_AIJ, 3205 0, 3206 0, 3207 0, 3208 MatFindZeroDiagonals_MPIAIJ, 3209 /*80*/ 0, 3210 0, 3211 0, 3212 /*83*/ MatLoad_MPIAIJ, 3213 0, 3214 0, 3215 0, 3216 0, 3217 0, 3218 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3219 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3220 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3221 MatPtAP_MPIAIJ_MPIAIJ, 3222 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3223 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 3224 0, 3225 0, 3226 0, 3227 0, 3228 /*99*/ 0, 3229 0, 3230 0, 3231 MatConjugate_MPIAIJ, 3232 0, 3233 /*104*/MatSetValuesRow_MPIAIJ, 3234 MatRealPart_MPIAIJ, 3235 MatImaginaryPart_MPIAIJ, 3236 0, 3237 0, 3238 /*109*/0, 3239 MatGetRedundantMatrix_MPIAIJ, 3240 MatGetRowMin_MPIAIJ, 3241 0, 3242 0, 3243 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3244 0, 3245 0, 3246 0, 3247 0, 3248 /*119*/0, 3249 0, 3250 0, 3251 0, 3252 MatGetMultiProcBlock_MPIAIJ, 3253 /*124*/MatFindNonzeroRows_MPIAIJ, 3254 MatGetColumnNorms_MPIAIJ, 3255 MatInvertBlockDiagonal_MPIAIJ, 3256 0, 3257 MatGetSubMatricesParallel_MPIAIJ, 3258 /*129*/0, 3259 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3260 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3261 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3262 0, 3263 /*134*/0, 3264 0, 3265 0, 3266 0, 3267 0 3268 }; 3269 3270 /* ----------------------------------------------------------------------------------------*/ 3271 3272 EXTERN_C_BEGIN 3273 #undef __FUNCT__ 3274 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3275 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3276 { 3277 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3278 PetscErrorCode ierr; 3279 3280 PetscFunctionBegin; 3281 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3282 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3283 PetscFunctionReturn(0); 3284 } 3285 EXTERN_C_END 3286 3287 EXTERN_C_BEGIN 3288 #undef __FUNCT__ 3289 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3290 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3291 { 3292 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3293 PetscErrorCode ierr; 3294 3295 PetscFunctionBegin; 3296 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3297 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3298 PetscFunctionReturn(0); 3299 } 3300 EXTERN_C_END 3301 3302 EXTERN_C_BEGIN 3303 #undef __FUNCT__ 3304 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3305 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3306 { 3307 Mat_MPIAIJ *b; 3308 PetscErrorCode ierr; 3309 PetscInt i; 3310 PetscBool d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE; 3311 3312 PetscFunctionBegin; 3313 if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE; 3314 if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE; 3315 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3316 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3317 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3318 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3319 3320 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3321 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3322 if (d_nnz) { 3323 for (i=0; i<B->rmap->n; i++) { 3324 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 3325 } 3326 } 3327 if (o_nnz) { 3328 for (i=0; i<B->rmap->n; i++) { 3329 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 3330 } 3331 } 3332 b = (Mat_MPIAIJ*)B->data; 3333 3334 if (!B->preallocated) { 3335 /* Explicitly create 2 MATSEQAIJ matrices. */ 3336 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3337 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3338 ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3339 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3340 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3341 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3342 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3343 ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3344 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3345 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3346 } 3347 3348 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3349 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3350 /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */ 3351 if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3352 if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3353 B->preallocated = PETSC_TRUE; 3354 PetscFunctionReturn(0); 3355 } 3356 EXTERN_C_END 3357 3358 #undef __FUNCT__ 3359 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3360 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3361 { 3362 Mat mat; 3363 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3364 PetscErrorCode ierr; 3365 3366 PetscFunctionBegin; 3367 *newmat = 0; 3368 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3369 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3370 ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr); 3371 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3372 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3373 a = (Mat_MPIAIJ*)mat->data; 3374 3375 mat->factortype = matin->factortype; 3376 mat->rmap->bs = matin->rmap->bs; 3377 mat->cmap->bs = matin->cmap->bs; 3378 mat->assembled = PETSC_TRUE; 3379 mat->insertmode = NOT_SET_VALUES; 3380 mat->preallocated = PETSC_TRUE; 3381 3382 a->size = oldmat->size; 3383 a->rank = oldmat->rank; 3384 a->donotstash = oldmat->donotstash; 3385 a->roworiented = oldmat->roworiented; 3386 a->rowindices = 0; 3387 a->rowvalues = 0; 3388 a->getrowactive = PETSC_FALSE; 3389 3390 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3391 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3392 3393 if (oldmat->colmap) { 3394 #if defined (PETSC_USE_CTABLE) 3395 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3396 #else 3397 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3398 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3399 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3400 #endif 3401 } else a->colmap = 0; 3402 if (oldmat->garray) { 3403 PetscInt len; 3404 len = oldmat->B->cmap->n; 3405 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3406 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3407 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3408 } else a->garray = 0; 3409 3410 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3411 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3412 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3413 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3414 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3415 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3416 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3417 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3418 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3419 *newmat = mat; 3420 PetscFunctionReturn(0); 3421 } 3422 3423 3424 3425 #undef __FUNCT__ 3426 #define __FUNCT__ "MatLoad_MPIAIJ" 3427 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3428 { 3429 PetscScalar *vals,*svals; 3430 MPI_Comm comm = ((PetscObject)viewer)->comm; 3431 PetscErrorCode ierr; 3432 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3433 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3434 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3435 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 3436 PetscInt cend,cstart,n,*rowners,sizesset=1; 3437 int fd; 3438 PetscInt bs = 1; 3439 3440 PetscFunctionBegin; 3441 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3442 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3443 if (!rank) { 3444 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3445 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3446 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3447 } 3448 3449 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3450 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3451 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3452 3453 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3454 3455 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3456 M = header[1]; N = header[2]; 3457 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3458 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3459 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3460 3461 /* If global sizes are set, check if they are consistent with that given in the file */ 3462 if (sizesset) { 3463 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3464 } 3465 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3466 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3467 3468 /* determine ownership of all (block) rows */ 3469 if ( M%bs ) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3470 if (newMat->rmap->n < 0 ) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3471 else m = newMat->rmap->n; /* Set by user */ 3472 3473 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3474 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3475 3476 /* First process needs enough room for process with most rows */ 3477 if (!rank) { 3478 mmax = rowners[1]; 3479 for (i=2; i<=size; i++) { 3480 mmax = PetscMax(mmax, rowners[i]); 3481 } 3482 } else mmax = m; 3483 3484 rowners[0] = 0; 3485 for (i=2; i<=size; i++) { 3486 rowners[i] += rowners[i-1]; 3487 } 3488 rstart = rowners[rank]; 3489 rend = rowners[rank+1]; 3490 3491 /* distribute row lengths to all processors */ 3492 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 3493 if (!rank) { 3494 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3495 ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3496 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3497 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3498 for (j=0; j<m; j++) { 3499 procsnz[0] += ourlens[j]; 3500 } 3501 for (i=1; i<size; i++) { 3502 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3503 /* calculate the number of nonzeros on each processor */ 3504 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3505 procsnz[i] += rowlengths[j]; 3506 } 3507 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3508 } 3509 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3510 } else { 3511 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3512 } 3513 3514 if (!rank) { 3515 /* determine max buffer needed and allocate it */ 3516 maxnz = 0; 3517 for (i=0; i<size; i++) { 3518 maxnz = PetscMax(maxnz,procsnz[i]); 3519 } 3520 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3521 3522 /* read in my part of the matrix column indices */ 3523 nz = procsnz[0]; 3524 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3525 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3526 3527 /* read in every one elses and ship off */ 3528 for (i=1; i<size; i++) { 3529 nz = procsnz[i]; 3530 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3531 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3532 } 3533 ierr = PetscFree(cols);CHKERRQ(ierr); 3534 } else { 3535 /* determine buffer space needed for message */ 3536 nz = 0; 3537 for (i=0; i<m; i++) { 3538 nz += ourlens[i]; 3539 } 3540 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3541 3542 /* receive message of column indices*/ 3543 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3544 } 3545 3546 /* determine column ownership if matrix is not square */ 3547 if (N != M) { 3548 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3549 else n = newMat->cmap->n; 3550 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3551 cstart = cend - n; 3552 } else { 3553 cstart = rstart; 3554 cend = rend; 3555 n = cend - cstart; 3556 } 3557 3558 /* loop over local rows, determining number of off diagonal entries */ 3559 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3560 jj = 0; 3561 for (i=0; i<m; i++) { 3562 for (j=0; j<ourlens[i]; j++) { 3563 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3564 jj++; 3565 } 3566 } 3567 3568 for (i=0; i<m; i++) { 3569 ourlens[i] -= offlens[i]; 3570 } 3571 if (!sizesset) { 3572 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3573 } 3574 3575 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3576 3577 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3578 3579 for (i=0; i<m; i++) { 3580 ourlens[i] += offlens[i]; 3581 } 3582 3583 if (!rank) { 3584 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3585 3586 /* read in my part of the matrix numerical values */ 3587 nz = procsnz[0]; 3588 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3589 3590 /* insert into matrix */ 3591 jj = rstart; 3592 smycols = mycols; 3593 svals = vals; 3594 for (i=0; i<m; i++) { 3595 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3596 smycols += ourlens[i]; 3597 svals += ourlens[i]; 3598 jj++; 3599 } 3600 3601 /* read in other processors and ship out */ 3602 for (i=1; i<size; i++) { 3603 nz = procsnz[i]; 3604 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3605 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3606 } 3607 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3608 } else { 3609 /* receive numeric values */ 3610 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3611 3612 /* receive message of values*/ 3613 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3614 3615 /* insert into matrix */ 3616 jj = rstart; 3617 smycols = mycols; 3618 svals = vals; 3619 for (i=0; i<m; i++) { 3620 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3621 smycols += ourlens[i]; 3622 svals += ourlens[i]; 3623 jj++; 3624 } 3625 } 3626 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3627 ierr = PetscFree(vals);CHKERRQ(ierr); 3628 ierr = PetscFree(mycols);CHKERRQ(ierr); 3629 ierr = PetscFree(rowners);CHKERRQ(ierr); 3630 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3631 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3632 3633 PetscFunctionReturn(0); 3634 } 3635 3636 #undef __FUNCT__ 3637 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3638 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3639 { 3640 PetscErrorCode ierr; 3641 IS iscol_local; 3642 PetscInt csize; 3643 3644 PetscFunctionBegin; 3645 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3646 if (call == MAT_REUSE_MATRIX) { 3647 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3648 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3649 } else { 3650 PetscInt cbs; 3651 ierr = ISGetBlockSize(iscol,&cbs); CHKERRQ(ierr); 3652 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3653 ierr = ISSetBlockSize(iscol_local,cbs); CHKERRQ(ierr); 3654 } 3655 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3656 if (call == MAT_INITIAL_MATRIX) { 3657 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3658 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3659 } 3660 PetscFunctionReturn(0); 3661 } 3662 3663 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3664 #undef __FUNCT__ 3665 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3666 /* 3667 Not great since it makes two copies of the submatrix, first an SeqAIJ 3668 in local and then by concatenating the local matrices the end result. 3669 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3670 3671 Note: This requires a sequential iscol with all indices. 3672 */ 3673 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3674 { 3675 PetscErrorCode ierr; 3676 PetscMPIInt rank,size; 3677 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3678 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3679 PetscBool allcolumns, colflag; 3680 Mat M,Mreuse; 3681 MatScalar *vwork,*aa; 3682 MPI_Comm comm = ((PetscObject)mat)->comm; 3683 Mat_SeqAIJ *aij; 3684 3685 3686 PetscFunctionBegin; 3687 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3688 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3689 3690 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3691 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3692 if (colflag && ncol == mat->cmap->N){ 3693 allcolumns = PETSC_TRUE; 3694 } else { 3695 allcolumns = PETSC_FALSE; 3696 } 3697 if (call == MAT_REUSE_MATRIX) { 3698 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3699 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3700 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3701 } else { 3702 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3703 } 3704 3705 /* 3706 m - number of local rows 3707 n - number of columns (same on all processors) 3708 rstart - first row in new global matrix generated 3709 */ 3710 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3711 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3712 if (call == MAT_INITIAL_MATRIX) { 3713 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3714 ii = aij->i; 3715 jj = aij->j; 3716 3717 /* 3718 Determine the number of non-zeros in the diagonal and off-diagonal 3719 portions of the matrix in order to do correct preallocation 3720 */ 3721 3722 /* first get start and end of "diagonal" columns */ 3723 if (csize == PETSC_DECIDE) { 3724 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3725 if (mglobal == n) { /* square matrix */ 3726 nlocal = m; 3727 } else { 3728 nlocal = n/size + ((n % size) > rank); 3729 } 3730 } else { 3731 nlocal = csize; 3732 } 3733 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3734 rstart = rend - nlocal; 3735 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3736 3737 /* next, compute all the lengths */ 3738 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3739 olens = dlens + m; 3740 for (i=0; i<m; i++) { 3741 jend = ii[i+1] - ii[i]; 3742 olen = 0; 3743 dlen = 0; 3744 for (j=0; j<jend; j++) { 3745 if (*jj < rstart || *jj >= rend) olen++; 3746 else dlen++; 3747 jj++; 3748 } 3749 olens[i] = olen; 3750 dlens[i] = dlen; 3751 } 3752 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3753 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3754 ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr); 3755 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3756 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3757 ierr = PetscFree(dlens);CHKERRQ(ierr); 3758 } else { 3759 PetscInt ml,nl; 3760 3761 M = *newmat; 3762 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3763 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3764 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3765 /* 3766 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3767 rather than the slower MatSetValues(). 3768 */ 3769 M->was_assembled = PETSC_TRUE; 3770 M->assembled = PETSC_FALSE; 3771 } 3772 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3773 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3774 ii = aij->i; 3775 jj = aij->j; 3776 aa = aij->a; 3777 for (i=0; i<m; i++) { 3778 row = rstart + i; 3779 nz = ii[i+1] - ii[i]; 3780 cwork = jj; jj += nz; 3781 vwork = aa; aa += nz; 3782 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3783 } 3784 3785 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3786 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3787 *newmat = M; 3788 3789 /* save submatrix used in processor for next request */ 3790 if (call == MAT_INITIAL_MATRIX) { 3791 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3792 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3793 } 3794 3795 PetscFunctionReturn(0); 3796 } 3797 3798 EXTERN_C_BEGIN 3799 #undef __FUNCT__ 3800 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3801 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3802 { 3803 PetscInt m,cstart, cend,j,nnz,i,d; 3804 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3805 const PetscInt *JJ; 3806 PetscScalar *values; 3807 PetscErrorCode ierr; 3808 3809 PetscFunctionBegin; 3810 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3811 3812 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3813 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3814 m = B->rmap->n; 3815 cstart = B->cmap->rstart; 3816 cend = B->cmap->rend; 3817 rstart = B->rmap->rstart; 3818 3819 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3820 3821 #if defined(PETSC_USE_DEBUGGING) 3822 for (i=0; i<m; i++) { 3823 nnz = Ii[i+1]- Ii[i]; 3824 JJ = J + Ii[i]; 3825 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3826 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3827 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3828 } 3829 #endif 3830 3831 for (i=0; i<m; i++) { 3832 nnz = Ii[i+1]- Ii[i]; 3833 JJ = J + Ii[i]; 3834 nnz_max = PetscMax(nnz_max,nnz); 3835 d = 0; 3836 for (j=0; j<nnz; j++) { 3837 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3838 } 3839 d_nnz[i] = d; 3840 o_nnz[i] = nnz - d; 3841 } 3842 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3843 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3844 3845 if (v) values = (PetscScalar*)v; 3846 else { 3847 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3848 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3849 } 3850 3851 for (i=0; i<m; i++) { 3852 ii = i + rstart; 3853 nnz = Ii[i+1]- Ii[i]; 3854 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3855 } 3856 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3857 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3858 3859 if (!v) { 3860 ierr = PetscFree(values);CHKERRQ(ierr); 3861 } 3862 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3863 PetscFunctionReturn(0); 3864 } 3865 EXTERN_C_END 3866 3867 #undef __FUNCT__ 3868 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3869 /*@ 3870 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3871 (the default parallel PETSc format). 3872 3873 Collective on MPI_Comm 3874 3875 Input Parameters: 3876 + B - the matrix 3877 . i - the indices into j for the start of each local row (starts with zero) 3878 . j - the column indices for each local row (starts with zero) 3879 - v - optional values in the matrix 3880 3881 Level: developer 3882 3883 Notes: 3884 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3885 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3886 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3887 3888 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3889 3890 The format which is used for the sparse matrix input, is equivalent to a 3891 row-major ordering.. i.e for the following matrix, the input data expected is 3892 as shown: 3893 3894 1 0 0 3895 2 0 3 P0 3896 ------- 3897 4 5 6 P1 3898 3899 Process0 [P0]: rows_owned=[0,1] 3900 i = {0,1,3} [size = nrow+1 = 2+1] 3901 j = {0,0,2} [size = nz = 6] 3902 v = {1,2,3} [size = nz = 6] 3903 3904 Process1 [P1]: rows_owned=[2] 3905 i = {0,3} [size = nrow+1 = 1+1] 3906 j = {0,1,2} [size = nz = 6] 3907 v = {4,5,6} [size = nz = 6] 3908 3909 .keywords: matrix, aij, compressed row, sparse, parallel 3910 3911 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3912 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3913 @*/ 3914 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3915 { 3916 PetscErrorCode ierr; 3917 3918 PetscFunctionBegin; 3919 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3920 PetscFunctionReturn(0); 3921 } 3922 3923 #undef __FUNCT__ 3924 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3925 /*@C 3926 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3927 (the default parallel PETSc format). For good matrix assembly performance 3928 the user should preallocate the matrix storage by setting the parameters 3929 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3930 performance can be increased by more than a factor of 50. 3931 3932 Collective on MPI_Comm 3933 3934 Input Parameters: 3935 + A - the matrix 3936 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3937 (same value is used for all local rows) 3938 . d_nnz - array containing the number of nonzeros in the various rows of the 3939 DIAGONAL portion of the local submatrix (possibly different for each row) 3940 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3941 The size of this array is equal to the number of local rows, i.e 'm'. 3942 For matrices that will be factored, you must leave room for (and set) 3943 the diagonal entry even if it is zero. 3944 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3945 submatrix (same value is used for all local rows). 3946 - o_nnz - array containing the number of nonzeros in the various rows of the 3947 OFF-DIAGONAL portion of the local submatrix (possibly different for 3948 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3949 structure. The size of this array is equal to the number 3950 of local rows, i.e 'm'. 3951 3952 If the *_nnz parameter is given then the *_nz parameter is ignored 3953 3954 The AIJ format (also called the Yale sparse matrix format or 3955 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3956 storage. The stored row and column indices begin with zero. 3957 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3958 3959 The parallel matrix is partitioned such that the first m0 rows belong to 3960 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3961 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3962 3963 The DIAGONAL portion of the local submatrix of a processor can be defined 3964 as the submatrix which is obtained by extraction the part corresponding to 3965 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3966 first row that belongs to the processor, r2 is the last row belonging to 3967 the this processor, and c1-c2 is range of indices of the local part of a 3968 vector suitable for applying the matrix to. This is an mxn matrix. In the 3969 common case of a square matrix, the row and column ranges are the same and 3970 the DIAGONAL part is also square. The remaining portion of the local 3971 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3972 3973 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3974 3975 You can call MatGetInfo() to get information on how effective the preallocation was; 3976 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3977 You can also run with the option -info and look for messages with the string 3978 malloc in them to see if additional memory allocation was needed. 3979 3980 Example usage: 3981 3982 Consider the following 8x8 matrix with 34 non-zero values, that is 3983 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3984 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3985 as follows: 3986 3987 .vb 3988 1 2 0 | 0 3 0 | 0 4 3989 Proc0 0 5 6 | 7 0 0 | 8 0 3990 9 0 10 | 11 0 0 | 12 0 3991 ------------------------------------- 3992 13 0 14 | 15 16 17 | 0 0 3993 Proc1 0 18 0 | 19 20 21 | 0 0 3994 0 0 0 | 22 23 0 | 24 0 3995 ------------------------------------- 3996 Proc2 25 26 27 | 0 0 28 | 29 0 3997 30 0 0 | 31 32 33 | 0 34 3998 .ve 3999 4000 This can be represented as a collection of submatrices as: 4001 4002 .vb 4003 A B C 4004 D E F 4005 G H I 4006 .ve 4007 4008 Where the submatrices A,B,C are owned by proc0, D,E,F are 4009 owned by proc1, G,H,I are owned by proc2. 4010 4011 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4012 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4013 The 'M','N' parameters are 8,8, and have the same values on all procs. 4014 4015 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4016 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4017 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4018 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4019 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4020 matrix, ans [DF] as another SeqAIJ matrix. 4021 4022 When d_nz, o_nz parameters are specified, d_nz storage elements are 4023 allocated for every row of the local diagonal submatrix, and o_nz 4024 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4025 One way to choose d_nz and o_nz is to use the max nonzerors per local 4026 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4027 In this case, the values of d_nz,o_nz are: 4028 .vb 4029 proc0 : dnz = 2, o_nz = 2 4030 proc1 : dnz = 3, o_nz = 2 4031 proc2 : dnz = 1, o_nz = 4 4032 .ve 4033 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4034 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4035 for proc3. i.e we are using 12+15+10=37 storage locations to store 4036 34 values. 4037 4038 When d_nnz, o_nnz parameters are specified, the storage is specified 4039 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4040 In the above case the values for d_nnz,o_nnz are: 4041 .vb 4042 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4043 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4044 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4045 .ve 4046 Here the space allocated is sum of all the above values i.e 34, and 4047 hence pre-allocation is perfect. 4048 4049 Level: intermediate 4050 4051 .keywords: matrix, aij, compressed row, sparse, parallel 4052 4053 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4054 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 4055 @*/ 4056 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4057 { 4058 PetscErrorCode ierr; 4059 4060 PetscFunctionBegin; 4061 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4062 PetscValidType(B,1); 4063 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4064 PetscFunctionReturn(0); 4065 } 4066 4067 #undef __FUNCT__ 4068 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 4069 /*@ 4070 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4071 CSR format the local rows. 4072 4073 Collective on MPI_Comm 4074 4075 Input Parameters: 4076 + comm - MPI communicator 4077 . m - number of local rows (Cannot be PETSC_DECIDE) 4078 . n - This value should be the same as the local size used in creating the 4079 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4080 calculated if N is given) For square matrices n is almost always m. 4081 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4082 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4083 . i - row indices 4084 . j - column indices 4085 - a - matrix values 4086 4087 Output Parameter: 4088 . mat - the matrix 4089 4090 Level: intermediate 4091 4092 Notes: 4093 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4094 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4095 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4096 4097 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4098 4099 The format which is used for the sparse matrix input, is equivalent to a 4100 row-major ordering.. i.e for the following matrix, the input data expected is 4101 as shown: 4102 4103 1 0 0 4104 2 0 3 P0 4105 ------- 4106 4 5 6 P1 4107 4108 Process0 [P0]: rows_owned=[0,1] 4109 i = {0,1,3} [size = nrow+1 = 2+1] 4110 j = {0,0,2} [size = nz = 6] 4111 v = {1,2,3} [size = nz = 6] 4112 4113 Process1 [P1]: rows_owned=[2] 4114 i = {0,3} [size = nrow+1 = 1+1] 4115 j = {0,1,2} [size = nz = 6] 4116 v = {4,5,6} [size = nz = 6] 4117 4118 .keywords: matrix, aij, compressed row, sparse, parallel 4119 4120 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4121 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4122 @*/ 4123 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4124 { 4125 PetscErrorCode ierr; 4126 4127 PetscFunctionBegin; 4128 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4129 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4130 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4131 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4132 /* ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr); */ 4133 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4134 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4135 PetscFunctionReturn(0); 4136 } 4137 4138 #undef __FUNCT__ 4139 #define __FUNCT__ "MatCreateAIJ" 4140 /*@C 4141 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4142 (the default parallel PETSc format). For good matrix assembly performance 4143 the user should preallocate the matrix storage by setting the parameters 4144 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4145 performance can be increased by more than a factor of 50. 4146 4147 Collective on MPI_Comm 4148 4149 Input Parameters: 4150 + comm - MPI communicator 4151 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4152 This value should be the same as the local size used in creating the 4153 y vector for the matrix-vector product y = Ax. 4154 . n - This value should be the same as the local size used in creating the 4155 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4156 calculated if N is given) For square matrices n is almost always m. 4157 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4158 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4159 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4160 (same value is used for all local rows) 4161 . d_nnz - array containing the number of nonzeros in the various rows of the 4162 DIAGONAL portion of the local submatrix (possibly different for each row) 4163 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 4164 The size of this array is equal to the number of local rows, i.e 'm'. 4165 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4166 submatrix (same value is used for all local rows). 4167 - o_nnz - array containing the number of nonzeros in the various rows of the 4168 OFF-DIAGONAL portion of the local submatrix (possibly different for 4169 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 4170 structure. The size of this array is equal to the number 4171 of local rows, i.e 'm'. 4172 4173 Output Parameter: 4174 . A - the matrix 4175 4176 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4177 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4178 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4179 4180 Notes: 4181 If the *_nnz parameter is given then the *_nz parameter is ignored 4182 4183 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4184 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4185 storage requirements for this matrix. 4186 4187 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4188 processor than it must be used on all processors that share the object for 4189 that argument. 4190 4191 The user MUST specify either the local or global matrix dimensions 4192 (possibly both). 4193 4194 The parallel matrix is partitioned across processors such that the 4195 first m0 rows belong to process 0, the next m1 rows belong to 4196 process 1, the next m2 rows belong to process 2 etc.. where 4197 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4198 values corresponding to [m x N] submatrix. 4199 4200 The columns are logically partitioned with the n0 columns belonging 4201 to 0th partition, the next n1 columns belonging to the next 4202 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4203 4204 The DIAGONAL portion of the local submatrix on any given processor 4205 is the submatrix corresponding to the rows and columns m,n 4206 corresponding to the given processor. i.e diagonal matrix on 4207 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4208 etc. The remaining portion of the local submatrix [m x (N-n)] 4209 constitute the OFF-DIAGONAL portion. The example below better 4210 illustrates this concept. 4211 4212 For a square global matrix we define each processor's diagonal portion 4213 to be its local rows and the corresponding columns (a square submatrix); 4214 each processor's off-diagonal portion encompasses the remainder of the 4215 local matrix (a rectangular submatrix). 4216 4217 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4218 4219 When calling this routine with a single process communicator, a matrix of 4220 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4221 type of communicator, use the construction mechanism: 4222 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4223 4224 By default, this format uses inodes (identical nodes) when possible. 4225 We search for consecutive rows with the same nonzero structure, thereby 4226 reusing matrix information to achieve increased efficiency. 4227 4228 Options Database Keys: 4229 + -mat_no_inode - Do not use inodes 4230 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4231 - -mat_aij_oneindex - Internally use indexing starting at 1 4232 rather than 0. Note that when calling MatSetValues(), 4233 the user still MUST index entries starting at 0! 4234 4235 4236 Example usage: 4237 4238 Consider the following 8x8 matrix with 34 non-zero values, that is 4239 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4240 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4241 as follows: 4242 4243 .vb 4244 1 2 0 | 0 3 0 | 0 4 4245 Proc0 0 5 6 | 7 0 0 | 8 0 4246 9 0 10 | 11 0 0 | 12 0 4247 ------------------------------------- 4248 13 0 14 | 15 16 17 | 0 0 4249 Proc1 0 18 0 | 19 20 21 | 0 0 4250 0 0 0 | 22 23 0 | 24 0 4251 ------------------------------------- 4252 Proc2 25 26 27 | 0 0 28 | 29 0 4253 30 0 0 | 31 32 33 | 0 34 4254 .ve 4255 4256 This can be represented as a collection of submatrices as: 4257 4258 .vb 4259 A B C 4260 D E F 4261 G H I 4262 .ve 4263 4264 Where the submatrices A,B,C are owned by proc0, D,E,F are 4265 owned by proc1, G,H,I are owned by proc2. 4266 4267 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4268 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4269 The 'M','N' parameters are 8,8, and have the same values on all procs. 4270 4271 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4272 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4273 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4274 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4275 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4276 matrix, ans [DF] as another SeqAIJ matrix. 4277 4278 When d_nz, o_nz parameters are specified, d_nz storage elements are 4279 allocated for every row of the local diagonal submatrix, and o_nz 4280 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4281 One way to choose d_nz and o_nz is to use the max nonzerors per local 4282 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4283 In this case, the values of d_nz,o_nz are: 4284 .vb 4285 proc0 : dnz = 2, o_nz = 2 4286 proc1 : dnz = 3, o_nz = 2 4287 proc2 : dnz = 1, o_nz = 4 4288 .ve 4289 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4290 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4291 for proc3. i.e we are using 12+15+10=37 storage locations to store 4292 34 values. 4293 4294 When d_nnz, o_nnz parameters are specified, the storage is specified 4295 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4296 In the above case the values for d_nnz,o_nnz are: 4297 .vb 4298 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4299 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4300 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4301 .ve 4302 Here the space allocated is sum of all the above values i.e 34, and 4303 hence pre-allocation is perfect. 4304 4305 Level: intermediate 4306 4307 .keywords: matrix, aij, compressed row, sparse, parallel 4308 4309 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4310 MPIAIJ, MatCreateMPIAIJWithArrays() 4311 @*/ 4312 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4313 { 4314 PetscErrorCode ierr; 4315 PetscMPIInt size; 4316 4317 PetscFunctionBegin; 4318 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4319 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4320 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4321 if (size > 1) { 4322 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4323 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4324 } else { 4325 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4326 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4327 } 4328 PetscFunctionReturn(0); 4329 } 4330 4331 #undef __FUNCT__ 4332 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4333 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4334 { 4335 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4336 4337 PetscFunctionBegin; 4338 *Ad = a->A; 4339 *Ao = a->B; 4340 *colmap = a->garray; 4341 PetscFunctionReturn(0); 4342 } 4343 4344 #undef __FUNCT__ 4345 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4346 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4347 { 4348 PetscErrorCode ierr; 4349 PetscInt i; 4350 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4351 4352 PetscFunctionBegin; 4353 if (coloring->ctype == IS_COLORING_GLOBAL) { 4354 ISColoringValue *allcolors,*colors; 4355 ISColoring ocoloring; 4356 4357 /* set coloring for diagonal portion */ 4358 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4359 4360 /* set coloring for off-diagonal portion */ 4361 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 4362 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4363 for (i=0; i<a->B->cmap->n; i++) { 4364 colors[i] = allcolors[a->garray[i]]; 4365 } 4366 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4367 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4368 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4369 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4370 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4371 ISColoringValue *colors; 4372 PetscInt *larray; 4373 ISColoring ocoloring; 4374 4375 /* set coloring for diagonal portion */ 4376 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4377 for (i=0; i<a->A->cmap->n; i++) { 4378 larray[i] = i + A->cmap->rstart; 4379 } 4380 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 4381 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4382 for (i=0; i<a->A->cmap->n; i++) { 4383 colors[i] = coloring->colors[larray[i]]; 4384 } 4385 ierr = PetscFree(larray);CHKERRQ(ierr); 4386 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4387 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4388 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4389 4390 /* set coloring for off-diagonal portion */ 4391 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4392 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 4393 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4394 for (i=0; i<a->B->cmap->n; i++) { 4395 colors[i] = coloring->colors[larray[i]]; 4396 } 4397 ierr = PetscFree(larray);CHKERRQ(ierr); 4398 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4399 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4400 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4401 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4402 4403 PetscFunctionReturn(0); 4404 } 4405 4406 #if defined(PETSC_HAVE_ADIC) 4407 #undef __FUNCT__ 4408 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 4409 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 4410 { 4411 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4412 PetscErrorCode ierr; 4413 4414 PetscFunctionBegin; 4415 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 4416 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 4417 PetscFunctionReturn(0); 4418 } 4419 #endif 4420 4421 #undef __FUNCT__ 4422 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4423 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4424 { 4425 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4426 PetscErrorCode ierr; 4427 4428 PetscFunctionBegin; 4429 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4430 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4431 PetscFunctionReturn(0); 4432 } 4433 4434 #undef __FUNCT__ 4435 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4436 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4437 { 4438 PetscErrorCode ierr; 4439 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4440 PetscInt *indx; 4441 4442 PetscFunctionBegin; 4443 /* This routine will ONLY return MPIAIJ type matrix */ 4444 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4445 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4446 if (n == PETSC_DECIDE){ 4447 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4448 } 4449 /* Check sum(n) = N */ 4450 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4451 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4452 4453 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4454 rstart -= m; 4455 4456 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4457 for (i=0;i<m;i++) { 4458 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4459 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4460 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4461 } 4462 4463 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4464 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE); CHKERRQ(ierr); 4465 ierr = MatSetBlockSizes(*outmat,bs,cbs); CHKERRQ(ierr); 4466 ierr = MatSetType(*outmat,MATMPIAIJ); CHKERRQ(ierr); 4467 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4468 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4469 PetscFunctionReturn(0); 4470 } 4471 4472 #undef __FUNCT__ 4473 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4474 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4475 { 4476 PetscErrorCode ierr; 4477 PetscInt m,N,i,rstart,nnz,Ii; 4478 PetscInt *indx; 4479 PetscScalar *values; 4480 4481 PetscFunctionBegin; 4482 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4483 ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 4484 for (i=0;i<m;i++) { 4485 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4486 Ii = i + rstart; 4487 ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4488 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4489 } 4490 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4491 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4492 PetscFunctionReturn(0); 4493 } 4494 4495 #undef __FUNCT__ 4496 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4497 /*@ 4498 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4499 matrices from each processor 4500 4501 Collective on MPI_Comm 4502 4503 Input Parameters: 4504 + comm - the communicators the parallel matrix will live on 4505 . inmat - the input sequential matrices 4506 . n - number of local columns (or PETSC_DECIDE) 4507 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4508 4509 Output Parameter: 4510 . outmat - the parallel matrix generated 4511 4512 Level: advanced 4513 4514 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4515 4516 @*/ 4517 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4518 { 4519 PetscErrorCode ierr; 4520 4521 PetscFunctionBegin; 4522 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4523 if (scall == MAT_INITIAL_MATRIX){ 4524 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4525 } 4526 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4527 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4528 PetscFunctionReturn(0); 4529 } 4530 4531 #undef __FUNCT__ 4532 #define __FUNCT__ "MatFileSplit" 4533 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4534 { 4535 PetscErrorCode ierr; 4536 PetscMPIInt rank; 4537 PetscInt m,N,i,rstart,nnz; 4538 size_t len; 4539 const PetscInt *indx; 4540 PetscViewer out; 4541 char *name; 4542 Mat B; 4543 const PetscScalar *values; 4544 4545 PetscFunctionBegin; 4546 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4547 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4548 /* Should this be the type of the diagonal block of A? */ 4549 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4550 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4551 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 4552 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4553 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4554 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4555 for (i=0;i<m;i++) { 4556 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4557 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4558 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4559 } 4560 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4561 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4562 4563 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4564 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4565 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4566 sprintf(name,"%s.%d",outfile,rank); 4567 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4568 ierr = PetscFree(name); 4569 ierr = MatView(B,out);CHKERRQ(ierr); 4570 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4571 ierr = MatDestroy(&B);CHKERRQ(ierr); 4572 PetscFunctionReturn(0); 4573 } 4574 4575 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4576 #undef __FUNCT__ 4577 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4578 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4579 { 4580 PetscErrorCode ierr; 4581 Mat_Merge_SeqsToMPI *merge; 4582 PetscContainer container; 4583 4584 PetscFunctionBegin; 4585 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4586 if (container) { 4587 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4588 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4589 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4590 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4591 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4592 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4593 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4594 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4595 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4596 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4597 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4598 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4599 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4600 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4601 ierr = PetscFree(merge);CHKERRQ(ierr); 4602 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4603 } 4604 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4605 PetscFunctionReturn(0); 4606 } 4607 4608 #include <../src/mat/utils/freespace.h> 4609 #include <petscbt.h> 4610 4611 #undef __FUNCT__ 4612 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4613 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4614 { 4615 PetscErrorCode ierr; 4616 MPI_Comm comm=((PetscObject)mpimat)->comm; 4617 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4618 PetscMPIInt size,rank,taga,*len_s; 4619 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4620 PetscInt proc,m; 4621 PetscInt **buf_ri,**buf_rj; 4622 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4623 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4624 MPI_Request *s_waits,*r_waits; 4625 MPI_Status *status; 4626 MatScalar *aa=a->a; 4627 MatScalar **abuf_r,*ba_i; 4628 Mat_Merge_SeqsToMPI *merge; 4629 PetscContainer container; 4630 4631 PetscFunctionBegin; 4632 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4633 4634 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4635 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4636 4637 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4638 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4639 4640 bi = merge->bi; 4641 bj = merge->bj; 4642 buf_ri = merge->buf_ri; 4643 buf_rj = merge->buf_rj; 4644 4645 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4646 owners = merge->rowmap->range; 4647 len_s = merge->len_s; 4648 4649 /* send and recv matrix values */ 4650 /*-----------------------------*/ 4651 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4652 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4653 4654 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4655 for (proc=0,k=0; proc<size; proc++){ 4656 if (!len_s[proc]) continue; 4657 i = owners[proc]; 4658 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4659 k++; 4660 } 4661 4662 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4663 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4664 ierr = PetscFree(status);CHKERRQ(ierr); 4665 4666 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4667 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4668 4669 /* insert mat values of mpimat */ 4670 /*----------------------------*/ 4671 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4672 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4673 4674 for (k=0; k<merge->nrecv; k++){ 4675 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4676 nrows = *(buf_ri_k[k]); 4677 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4678 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4679 } 4680 4681 /* set values of ba */ 4682 m = merge->rowmap->n; 4683 for (i=0; i<m; i++) { 4684 arow = owners[rank] + i; 4685 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4686 bnzi = bi[i+1] - bi[i]; 4687 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4688 4689 /* add local non-zero vals of this proc's seqmat into ba */ 4690 anzi = ai[arow+1] - ai[arow]; 4691 aj = a->j + ai[arow]; 4692 aa = a->a + ai[arow]; 4693 nextaj = 0; 4694 for (j=0; nextaj<anzi; j++){ 4695 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4696 ba_i[j] += aa[nextaj++]; 4697 } 4698 } 4699 4700 /* add received vals into ba */ 4701 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4702 /* i-th row */ 4703 if (i == *nextrow[k]) { 4704 anzi = *(nextai[k]+1) - *nextai[k]; 4705 aj = buf_rj[k] + *(nextai[k]); 4706 aa = abuf_r[k] + *(nextai[k]); 4707 nextaj = 0; 4708 for (j=0; nextaj<anzi; j++){ 4709 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4710 ba_i[j] += aa[nextaj++]; 4711 } 4712 } 4713 nextrow[k]++; nextai[k]++; 4714 } 4715 } 4716 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4717 } 4718 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4719 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4720 4721 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4722 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4723 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4724 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4725 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4726 PetscFunctionReturn(0); 4727 } 4728 4729 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4730 4731 #undef __FUNCT__ 4732 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4733 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4734 { 4735 PetscErrorCode ierr; 4736 Mat B_mpi; 4737 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4738 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4739 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4740 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4741 PetscInt len,proc,*dnz,*onz,bs,cbs; 4742 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4743 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4744 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4745 MPI_Status *status; 4746 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4747 PetscBT lnkbt; 4748 Mat_Merge_SeqsToMPI *merge; 4749 PetscContainer container; 4750 4751 PetscFunctionBegin; 4752 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4753 4754 /* make sure it is a PETSc comm */ 4755 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4756 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4757 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4758 4759 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4760 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4761 4762 /* determine row ownership */ 4763 /*---------------------------------------------------------*/ 4764 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4765 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4766 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4767 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4768 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4769 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4770 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4771 4772 m = merge->rowmap->n; 4773 M = merge->rowmap->N; 4774 owners = merge->rowmap->range; 4775 4776 /* determine the number of messages to send, their lengths */ 4777 /*---------------------------------------------------------*/ 4778 len_s = merge->len_s; 4779 4780 len = 0; /* length of buf_si[] */ 4781 merge->nsend = 0; 4782 for (proc=0; proc<size; proc++){ 4783 len_si[proc] = 0; 4784 if (proc == rank){ 4785 len_s[proc] = 0; 4786 } else { 4787 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4788 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4789 } 4790 if (len_s[proc]) { 4791 merge->nsend++; 4792 nrows = 0; 4793 for (i=owners[proc]; i<owners[proc+1]; i++){ 4794 if (ai[i+1] > ai[i]) nrows++; 4795 } 4796 len_si[proc] = 2*(nrows+1); 4797 len += len_si[proc]; 4798 } 4799 } 4800 4801 /* determine the number and length of messages to receive for ij-structure */ 4802 /*-------------------------------------------------------------------------*/ 4803 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4804 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4805 4806 /* post the Irecv of j-structure */ 4807 /*-------------------------------*/ 4808 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4809 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4810 4811 /* post the Isend of j-structure */ 4812 /*--------------------------------*/ 4813 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4814 4815 for (proc=0, k=0; proc<size; proc++){ 4816 if (!len_s[proc]) continue; 4817 i = owners[proc]; 4818 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4819 k++; 4820 } 4821 4822 /* receives and sends of j-structure are complete */ 4823 /*------------------------------------------------*/ 4824 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4825 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4826 4827 /* send and recv i-structure */ 4828 /*---------------------------*/ 4829 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4830 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4831 4832 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4833 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4834 for (proc=0,k=0; proc<size; proc++){ 4835 if (!len_s[proc]) continue; 4836 /* form outgoing message for i-structure: 4837 buf_si[0]: nrows to be sent 4838 [1:nrows]: row index (global) 4839 [nrows+1:2*nrows+1]: i-structure index 4840 */ 4841 /*-------------------------------------------*/ 4842 nrows = len_si[proc]/2 - 1; 4843 buf_si_i = buf_si + nrows+1; 4844 buf_si[0] = nrows; 4845 buf_si_i[0] = 0; 4846 nrows = 0; 4847 for (i=owners[proc]; i<owners[proc+1]; i++){ 4848 anzi = ai[i+1] - ai[i]; 4849 if (anzi) { 4850 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4851 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4852 nrows++; 4853 } 4854 } 4855 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4856 k++; 4857 buf_si += len_si[proc]; 4858 } 4859 4860 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4861 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4862 4863 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4864 for (i=0; i<merge->nrecv; i++){ 4865 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4866 } 4867 4868 ierr = PetscFree(len_si);CHKERRQ(ierr); 4869 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4870 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4871 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4872 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4873 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4874 ierr = PetscFree(status);CHKERRQ(ierr); 4875 4876 /* compute a local seq matrix in each processor */ 4877 /*----------------------------------------------*/ 4878 /* allocate bi array and free space for accumulating nonzero column info */ 4879 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4880 bi[0] = 0; 4881 4882 /* create and initialize a linked list */ 4883 nlnk = N+1; 4884 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4885 4886 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4887 len = 0; 4888 len = ai[owners[rank+1]] - ai[owners[rank]]; 4889 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4890 current_space = free_space; 4891 4892 /* determine symbolic info for each local row */ 4893 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4894 4895 for (k=0; k<merge->nrecv; k++){ 4896 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4897 nrows = *buf_ri_k[k]; 4898 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4899 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4900 } 4901 4902 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4903 len = 0; 4904 for (i=0;i<m;i++) { 4905 bnzi = 0; 4906 /* add local non-zero cols of this proc's seqmat into lnk */ 4907 arow = owners[rank] + i; 4908 anzi = ai[arow+1] - ai[arow]; 4909 aj = a->j + ai[arow]; 4910 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4911 bnzi += nlnk; 4912 /* add received col data into lnk */ 4913 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4914 if (i == *nextrow[k]) { /* i-th row */ 4915 anzi = *(nextai[k]+1) - *nextai[k]; 4916 aj = buf_rj[k] + *nextai[k]; 4917 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4918 bnzi += nlnk; 4919 nextrow[k]++; nextai[k]++; 4920 } 4921 } 4922 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4923 4924 /* if free space is not available, make more free space */ 4925 if (current_space->local_remaining<bnzi) { 4926 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4927 nspacedouble++; 4928 } 4929 /* copy data into free space, then initialize lnk */ 4930 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4931 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4932 4933 current_space->array += bnzi; 4934 current_space->local_used += bnzi; 4935 current_space->local_remaining -= bnzi; 4936 4937 bi[i+1] = bi[i] + bnzi; 4938 } 4939 4940 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4941 4942 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4943 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4944 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4945 4946 /* create symbolic parallel matrix B_mpi */ 4947 /*---------------------------------------*/ 4948 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4949 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4950 if (n==PETSC_DECIDE) { 4951 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4952 } else { 4953 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4954 } 4955 ierr = MatSetBlockSizes(B_mpi,bs,cbs); CHKERRQ(ierr); 4956 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4957 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4958 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4959 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4960 4961 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4962 B_mpi->assembled = PETSC_FALSE; 4963 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4964 merge->bi = bi; 4965 merge->bj = bj; 4966 merge->buf_ri = buf_ri; 4967 merge->buf_rj = buf_rj; 4968 merge->coi = PETSC_NULL; 4969 merge->coj = PETSC_NULL; 4970 merge->owners_co = PETSC_NULL; 4971 4972 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4973 4974 /* attach the supporting struct to B_mpi for reuse */ 4975 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4976 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4977 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4978 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4979 *mpimat = B_mpi; 4980 4981 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4982 PetscFunctionReturn(0); 4983 } 4984 4985 #undef __FUNCT__ 4986 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4987 /*@C 4988 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4989 matrices from each processor 4990 4991 Collective on MPI_Comm 4992 4993 Input Parameters: 4994 + comm - the communicators the parallel matrix will live on 4995 . seqmat - the input sequential matrices 4996 . m - number of local rows (or PETSC_DECIDE) 4997 . n - number of local columns (or PETSC_DECIDE) 4998 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4999 5000 Output Parameter: 5001 . mpimat - the parallel matrix generated 5002 5003 Level: advanced 5004 5005 Notes: 5006 The dimensions of the sequential matrix in each processor MUST be the same. 5007 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5008 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 5009 @*/ 5010 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 5011 { 5012 PetscErrorCode ierr; 5013 PetscMPIInt size; 5014 5015 PetscFunctionBegin; 5016 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5017 if (size == 1){ 5018 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5019 if (scall == MAT_INITIAL_MATRIX){ 5020 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 5021 } else { 5022 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5023 } 5024 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5025 PetscFunctionReturn(0); 5026 } 5027 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5028 if (scall == MAT_INITIAL_MATRIX){ 5029 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 5030 } 5031 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 5032 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5033 PetscFunctionReturn(0); 5034 } 5035 5036 #undef __FUNCT__ 5037 #define __FUNCT__ "MatMPIAIJGetLocalMat" 5038 /*@ 5039 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 5040 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5041 with MatGetSize() 5042 5043 Not Collective 5044 5045 Input Parameters: 5046 + A - the matrix 5047 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5048 5049 Output Parameter: 5050 . A_loc - the local sequential matrix generated 5051 5052 Level: developer 5053 5054 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 5055 5056 @*/ 5057 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5058 { 5059 PetscErrorCode ierr; 5060 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5061 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 5062 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 5063 MatScalar *aa=a->a,*ba=b->a,*cam; 5064 PetscScalar *ca; 5065 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5066 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5067 PetscBool match; 5068 5069 PetscFunctionBegin; 5070 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5071 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5072 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5073 if (scall == MAT_INITIAL_MATRIX){ 5074 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 5075 ci[0] = 0; 5076 for (i=0; i<am; i++){ 5077 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5078 } 5079 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 5080 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 5081 k = 0; 5082 for (i=0; i<am; i++) { 5083 ncols_o = bi[i+1] - bi[i]; 5084 ncols_d = ai[i+1] - ai[i]; 5085 /* off-diagonal portion of A */ 5086 for (jo=0; jo<ncols_o; jo++) { 5087 col = cmap[*bj]; 5088 if (col >= cstart) break; 5089 cj[k] = col; bj++; 5090 ca[k++] = *ba++; 5091 } 5092 /* diagonal portion of A */ 5093 for (j=0; j<ncols_d; j++) { 5094 cj[k] = cstart + *aj++; 5095 ca[k++] = *aa++; 5096 } 5097 /* off-diagonal portion of A */ 5098 for (j=jo; j<ncols_o; j++) { 5099 cj[k] = cmap[*bj++]; 5100 ca[k++] = *ba++; 5101 } 5102 } 5103 /* put together the new matrix */ 5104 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5105 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5106 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5107 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5108 mat->free_a = PETSC_TRUE; 5109 mat->free_ij = PETSC_TRUE; 5110 mat->nonew = 0; 5111 } else if (scall == MAT_REUSE_MATRIX){ 5112 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5113 ci = mat->i; cj = mat->j; cam = mat->a; 5114 for (i=0; i<am; i++) { 5115 /* off-diagonal portion of A */ 5116 ncols_o = bi[i+1] - bi[i]; 5117 for (jo=0; jo<ncols_o; jo++) { 5118 col = cmap[*bj]; 5119 if (col >= cstart) break; 5120 *cam++ = *ba++; bj++; 5121 } 5122 /* diagonal portion of A */ 5123 ncols_d = ai[i+1] - ai[i]; 5124 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5125 /* off-diagonal portion of A */ 5126 for (j=jo; j<ncols_o; j++) { 5127 *cam++ = *ba++; bj++; 5128 } 5129 } 5130 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5131 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5132 PetscFunctionReturn(0); 5133 } 5134 5135 #undef __FUNCT__ 5136 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5137 /*@C 5138 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5139 5140 Not Collective 5141 5142 Input Parameters: 5143 + A - the matrix 5144 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5145 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 5146 5147 Output Parameter: 5148 . A_loc - the local sequential matrix generated 5149 5150 Level: developer 5151 5152 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5153 5154 @*/ 5155 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5156 { 5157 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5158 PetscErrorCode ierr; 5159 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5160 IS isrowa,iscola; 5161 Mat *aloc; 5162 PetscBool match; 5163 5164 PetscFunctionBegin; 5165 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5166 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5167 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5168 if (!row){ 5169 start = A->rmap->rstart; end = A->rmap->rend; 5170 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5171 } else { 5172 isrowa = *row; 5173 } 5174 if (!col){ 5175 start = A->cmap->rstart; 5176 cmap = a->garray; 5177 nzA = a->A->cmap->n; 5178 nzB = a->B->cmap->n; 5179 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5180 ncols = 0; 5181 for (i=0; i<nzB; i++) { 5182 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5183 else break; 5184 } 5185 imark = i; 5186 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5187 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5188 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5189 } else { 5190 iscola = *col; 5191 } 5192 if (scall != MAT_INITIAL_MATRIX){ 5193 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5194 aloc[0] = *A_loc; 5195 } 5196 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5197 *A_loc = aloc[0]; 5198 ierr = PetscFree(aloc);CHKERRQ(ierr); 5199 if (!row){ 5200 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5201 } 5202 if (!col){ 5203 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5204 } 5205 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5206 PetscFunctionReturn(0); 5207 } 5208 5209 #undef __FUNCT__ 5210 #define __FUNCT__ "MatGetBrowsOfAcols" 5211 /*@C 5212 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5213 5214 Collective on Mat 5215 5216 Input Parameters: 5217 + A,B - the matrices in mpiaij format 5218 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5219 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 5220 5221 Output Parameter: 5222 + rowb, colb - index sets of rows and columns of B to extract 5223 - B_seq - the sequential matrix generated 5224 5225 Level: developer 5226 5227 @*/ 5228 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5229 { 5230 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5231 PetscErrorCode ierr; 5232 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5233 IS isrowb,iscolb; 5234 Mat *bseq=PETSC_NULL; 5235 5236 PetscFunctionBegin; 5237 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5238 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5239 } 5240 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5241 5242 if (scall == MAT_INITIAL_MATRIX){ 5243 start = A->cmap->rstart; 5244 cmap = a->garray; 5245 nzA = a->A->cmap->n; 5246 nzB = a->B->cmap->n; 5247 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5248 ncols = 0; 5249 for (i=0; i<nzB; i++) { /* row < local row index */ 5250 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5251 else break; 5252 } 5253 imark = i; 5254 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5255 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5256 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5257 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5258 } else { 5259 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5260 isrowb = *rowb; iscolb = *colb; 5261 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5262 bseq[0] = *B_seq; 5263 } 5264 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5265 *B_seq = bseq[0]; 5266 ierr = PetscFree(bseq);CHKERRQ(ierr); 5267 if (!rowb){ 5268 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5269 } else { 5270 *rowb = isrowb; 5271 } 5272 if (!colb){ 5273 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5274 } else { 5275 *colb = iscolb; 5276 } 5277 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5278 PetscFunctionReturn(0); 5279 } 5280 5281 #undef __FUNCT__ 5282 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5283 /* 5284 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5285 of the OFF-DIAGONAL portion of local A 5286 5287 Collective on Mat 5288 5289 Input Parameters: 5290 + A,B - the matrices in mpiaij format 5291 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5292 5293 Output Parameter: 5294 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5295 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5296 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 5297 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5298 5299 Level: developer 5300 5301 */ 5302 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5303 { 5304 VecScatter_MPI_General *gen_to,*gen_from; 5305 PetscErrorCode ierr; 5306 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5307 Mat_SeqAIJ *b_oth; 5308 VecScatter ctx=a->Mvctx; 5309 MPI_Comm comm=((PetscObject)ctx)->comm; 5310 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5311 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5312 PetscScalar *rvalues,*svalues; 5313 MatScalar *b_otha,*bufa,*bufA; 5314 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5315 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 5316 MPI_Status *sstatus,rstatus; 5317 PetscMPIInt jj; 5318 PetscInt *cols,sbs,rbs; 5319 PetscScalar *vals; 5320 5321 PetscFunctionBegin; 5322 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5323 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5324 } 5325 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5326 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5327 5328 gen_to = (VecScatter_MPI_General*)ctx->todata; 5329 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5330 rvalues = gen_from->values; /* holds the length of receiving row */ 5331 svalues = gen_to->values; /* holds the length of sending row */ 5332 nrecvs = gen_from->n; 5333 nsends = gen_to->n; 5334 5335 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5336 srow = gen_to->indices; /* local row index to be sent */ 5337 sstarts = gen_to->starts; 5338 sprocs = gen_to->procs; 5339 sstatus = gen_to->sstatus; 5340 sbs = gen_to->bs; 5341 rstarts = gen_from->starts; 5342 rprocs = gen_from->procs; 5343 rbs = gen_from->bs; 5344 5345 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5346 if (scall == MAT_INITIAL_MATRIX){ 5347 /* i-array */ 5348 /*---------*/ 5349 /* post receives */ 5350 for (i=0; i<nrecvs; i++){ 5351 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5352 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5353 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5354 } 5355 5356 /* pack the outgoing message */ 5357 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5358 sstartsj[0] = 0; rstartsj[0] = 0; 5359 len = 0; /* total length of j or a array to be sent */ 5360 k = 0; 5361 for (i=0; i<nsends; i++){ 5362 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5363 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5364 for (j=0; j<nrows; j++) { 5365 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5366 for (l=0; l<sbs; l++){ 5367 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 5368 rowlen[j*sbs+l] = ncols; 5369 len += ncols; 5370 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 5371 } 5372 k++; 5373 } 5374 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5375 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5376 } 5377 /* recvs and sends of i-array are completed */ 5378 i = nrecvs; 5379 while (i--) { 5380 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5381 } 5382 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5383 5384 /* allocate buffers for sending j and a arrays */ 5385 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5386 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5387 5388 /* create i-array of B_oth */ 5389 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5390 b_othi[0] = 0; 5391 len = 0; /* total length of j or a array to be received */ 5392 k = 0; 5393 for (i=0; i<nrecvs; i++){ 5394 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5395 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5396 for (j=0; j<nrows; j++) { 5397 b_othi[k+1] = b_othi[k] + rowlen[j]; 5398 len += rowlen[j]; k++; 5399 } 5400 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5401 } 5402 5403 /* allocate space for j and a arrrays of B_oth */ 5404 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5405 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5406 5407 /* j-array */ 5408 /*---------*/ 5409 /* post receives of j-array */ 5410 for (i=0; i<nrecvs; i++){ 5411 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5412 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5413 } 5414 5415 /* pack the outgoing message j-array */ 5416 k = 0; 5417 for (i=0; i<nsends; i++){ 5418 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5419 bufJ = bufj+sstartsj[i]; 5420 for (j=0; j<nrows; j++) { 5421 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5422 for (ll=0; ll<sbs; ll++){ 5423 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5424 for (l=0; l<ncols; l++){ 5425 *bufJ++ = cols[l]; 5426 } 5427 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5428 } 5429 } 5430 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5431 } 5432 5433 /* recvs and sends of j-array are completed */ 5434 i = nrecvs; 5435 while (i--) { 5436 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5437 } 5438 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5439 } else if (scall == MAT_REUSE_MATRIX){ 5440 sstartsj = *startsj_s; 5441 rstartsj = *startsj_r; 5442 bufa = *bufa_ptr; 5443 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5444 b_otha = b_oth->a; 5445 } else { 5446 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5447 } 5448 5449 /* a-array */ 5450 /*---------*/ 5451 /* post receives of a-array */ 5452 for (i=0; i<nrecvs; i++){ 5453 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5454 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5455 } 5456 5457 /* pack the outgoing message a-array */ 5458 k = 0; 5459 for (i=0; i<nsends; i++){ 5460 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5461 bufA = bufa+sstartsj[i]; 5462 for (j=0; j<nrows; j++) { 5463 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5464 for (ll=0; ll<sbs; ll++){ 5465 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5466 for (l=0; l<ncols; l++){ 5467 *bufA++ = vals[l]; 5468 } 5469 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5470 } 5471 } 5472 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5473 } 5474 /* recvs and sends of a-array are completed */ 5475 i = nrecvs; 5476 while (i--) { 5477 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5478 } 5479 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5480 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5481 5482 if (scall == MAT_INITIAL_MATRIX){ 5483 /* put together the new matrix */ 5484 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5485 5486 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5487 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5488 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5489 b_oth->free_a = PETSC_TRUE; 5490 b_oth->free_ij = PETSC_TRUE; 5491 b_oth->nonew = 0; 5492 5493 ierr = PetscFree(bufj);CHKERRQ(ierr); 5494 if (!startsj_s || !bufa_ptr){ 5495 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5496 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5497 } else { 5498 *startsj_s = sstartsj; 5499 *startsj_r = rstartsj; 5500 *bufa_ptr = bufa; 5501 } 5502 } 5503 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5504 PetscFunctionReturn(0); 5505 } 5506 5507 #undef __FUNCT__ 5508 #define __FUNCT__ "MatGetCommunicationStructs" 5509 /*@C 5510 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5511 5512 Not Collective 5513 5514 Input Parameters: 5515 . A - The matrix in mpiaij format 5516 5517 Output Parameter: 5518 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5519 . colmap - A map from global column index to local index into lvec 5520 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5521 5522 Level: developer 5523 5524 @*/ 5525 #if defined (PETSC_USE_CTABLE) 5526 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5527 #else 5528 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5529 #endif 5530 { 5531 Mat_MPIAIJ *a; 5532 5533 PetscFunctionBegin; 5534 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5535 PetscValidPointer(lvec, 2); 5536 PetscValidPointer(colmap, 3); 5537 PetscValidPointer(multScatter, 4); 5538 a = (Mat_MPIAIJ *) A->data; 5539 if (lvec) *lvec = a->lvec; 5540 if (colmap) *colmap = a->colmap; 5541 if (multScatter) *multScatter = a->Mvctx; 5542 PetscFunctionReturn(0); 5543 } 5544 5545 EXTERN_C_BEGIN 5546 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5547 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5548 extern PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5549 EXTERN_C_END 5550 5551 #undef __FUNCT__ 5552 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5553 /* 5554 Computes (B'*A')' since computing B*A directly is untenable 5555 5556 n p p 5557 ( ) ( ) ( ) 5558 m ( A ) * n ( B ) = m ( C ) 5559 ( ) ( ) ( ) 5560 5561 */ 5562 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5563 { 5564 PetscErrorCode ierr; 5565 Mat At,Bt,Ct; 5566 5567 PetscFunctionBegin; 5568 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5569 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5570 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5571 ierr = MatDestroy(&At);CHKERRQ(ierr); 5572 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5573 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5574 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5575 PetscFunctionReturn(0); 5576 } 5577 5578 #undef __FUNCT__ 5579 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5580 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5581 { 5582 PetscErrorCode ierr; 5583 PetscInt m=A->rmap->n,n=B->cmap->n; 5584 Mat Cmat; 5585 5586 PetscFunctionBegin; 5587 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5588 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5589 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5590 ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 5591 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5592 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5593 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5594 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5595 5596 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5597 *C = Cmat; 5598 PetscFunctionReturn(0); 5599 } 5600 5601 /* ----------------------------------------------------------------*/ 5602 #undef __FUNCT__ 5603 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5604 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5605 { 5606 PetscErrorCode ierr; 5607 5608 PetscFunctionBegin; 5609 if (scall == MAT_INITIAL_MATRIX){ 5610 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5611 } 5612 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5613 PetscFunctionReturn(0); 5614 } 5615 5616 EXTERN_C_BEGIN 5617 #if defined(PETSC_HAVE_MUMPS) 5618 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5619 #endif 5620 #if defined(PETSC_HAVE_PASTIX) 5621 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5622 #endif 5623 #if defined(PETSC_HAVE_SUPERLU_DIST) 5624 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5625 #endif 5626 #if defined(PETSC_HAVE_CLIQUE) 5627 extern PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*); 5628 #endif 5629 EXTERN_C_END 5630 5631 /*MC 5632 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5633 5634 Options Database Keys: 5635 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5636 5637 Level: beginner 5638 5639 .seealso: MatCreateAIJ() 5640 M*/ 5641 5642 EXTERN_C_BEGIN 5643 #undef __FUNCT__ 5644 #define __FUNCT__ "MatCreate_MPIAIJ" 5645 PetscErrorCode MatCreate_MPIAIJ(Mat B) 5646 { 5647 Mat_MPIAIJ *b; 5648 PetscErrorCode ierr; 5649 PetscMPIInt size; 5650 5651 PetscFunctionBegin; 5652 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5653 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5654 B->data = (void*)b; 5655 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5656 B->assembled = PETSC_FALSE; 5657 B->insertmode = NOT_SET_VALUES; 5658 b->size = size; 5659 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5660 5661 /* build cache for off array entries formed */ 5662 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5663 b->donotstash = PETSC_FALSE; 5664 b->colmap = 0; 5665 b->garray = 0; 5666 b->roworiented = PETSC_TRUE; 5667 5668 /* stuff used for matrix vector multiply */ 5669 b->lvec = PETSC_NULL; 5670 b->Mvctx = PETSC_NULL; 5671 5672 /* stuff for MatGetRow() */ 5673 b->rowindices = 0; 5674 b->rowvalues = 0; 5675 b->getrowactive = PETSC_FALSE; 5676 5677 /* flexible pointer used in CUSP/CUSPARSE classes */ 5678 b->spptr = PETSC_NULL; 5679 5680 #if defined(PETSC_HAVE_MUMPS) 5681 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5682 "MatGetFactor_aij_mumps", 5683 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5684 #endif 5685 #if defined(PETSC_HAVE_PASTIX) 5686 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5687 "MatGetFactor_mpiaij_pastix", 5688 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5689 #endif 5690 #if defined(PETSC_HAVE_SUPERLU_DIST) 5691 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5692 "MatGetFactor_mpiaij_superlu_dist", 5693 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5694 #endif 5695 #if defined(PETSC_HAVE_CLIQUE) 5696 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_clique_C", 5697 "MatGetFactor_aij_clique", 5698 MatGetFactor_aij_clique);CHKERRQ(ierr); 5699 #endif 5700 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5701 "MatStoreValues_MPIAIJ", 5702 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5703 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5704 "MatRetrieveValues_MPIAIJ", 5705 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5706 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5707 "MatGetDiagonalBlock_MPIAIJ", 5708 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5709 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5710 "MatIsTranspose_MPIAIJ", 5711 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5712 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5713 "MatMPIAIJSetPreallocation_MPIAIJ", 5714 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5715 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5716 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5717 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5718 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5719 "MatDiagonalScaleLocal_MPIAIJ", 5720 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5721 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5722 "MatConvert_MPIAIJ_MPIAIJPERM", 5723 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5724 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5725 "MatConvert_MPIAIJ_MPIAIJCRL", 5726 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5727 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5728 "MatConvert_MPIAIJ_MPISBAIJ", 5729 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5730 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5731 "MatMatMult_MPIDense_MPIAIJ", 5732 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5733 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5734 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5735 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5736 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5737 "MatMatMultNumeric_MPIDense_MPIAIJ", 5738 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5739 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5740 PetscFunctionReturn(0); 5741 } 5742 EXTERN_C_END 5743 5744 #undef __FUNCT__ 5745 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5746 /*@ 5747 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5748 and "off-diagonal" part of the matrix in CSR format. 5749 5750 Collective on MPI_Comm 5751 5752 Input Parameters: 5753 + comm - MPI communicator 5754 . m - number of local rows (Cannot be PETSC_DECIDE) 5755 . n - This value should be the same as the local size used in creating the 5756 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5757 calculated if N is given) For square matrices n is almost always m. 5758 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5759 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5760 . i - row indices for "diagonal" portion of matrix 5761 . j - column indices 5762 . a - matrix values 5763 . oi - row indices for "off-diagonal" portion of matrix 5764 . oj - column indices 5765 - oa - matrix values 5766 5767 Output Parameter: 5768 . mat - the matrix 5769 5770 Level: advanced 5771 5772 Notes: 5773 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5774 must free the arrays once the matrix has been destroyed and not before. 5775 5776 The i and j indices are 0 based 5777 5778 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5779 5780 This sets local rows and cannot be used to set off-processor values. 5781 5782 You cannot later use MatSetValues() to change values in this matrix. 5783 5784 .keywords: matrix, aij, compressed row, sparse, parallel 5785 5786 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5787 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5788 @*/ 5789 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5790 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5791 { 5792 PetscErrorCode ierr; 5793 Mat_MPIAIJ *maij; 5794 5795 PetscFunctionBegin; 5796 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5797 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5798 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5799 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5800 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5801 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5802 maij = (Mat_MPIAIJ*) (*mat)->data; 5803 maij->donotstash = PETSC_TRUE; 5804 (*mat)->preallocated = PETSC_TRUE; 5805 5806 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5807 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5808 5809 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5810 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5811 5812 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5813 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5814 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5815 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5816 5817 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5818 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5819 PetscFunctionReturn(0); 5820 } 5821 5822 /* 5823 Special version for direct calls from Fortran 5824 */ 5825 #include <petsc-private/fortranimpl.h> 5826 5827 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5828 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5829 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5830 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5831 #endif 5832 5833 /* Change these macros so can be used in void function */ 5834 #undef CHKERRQ 5835 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5836 #undef SETERRQ2 5837 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5838 #undef SETERRQ3 5839 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5840 #undef SETERRQ 5841 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5842 5843 EXTERN_C_BEGIN 5844 #undef __FUNCT__ 5845 #define __FUNCT__ "matsetvaluesmpiaij_" 5846 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5847 { 5848 Mat mat = *mmat; 5849 PetscInt m = *mm, n = *mn; 5850 InsertMode addv = *maddv; 5851 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5852 PetscScalar value; 5853 PetscErrorCode ierr; 5854 5855 MatCheckPreallocated(mat,1); 5856 if (mat->insertmode == NOT_SET_VALUES) { 5857 mat->insertmode = addv; 5858 } 5859 #if defined(PETSC_USE_DEBUG) 5860 else if (mat->insertmode != addv) { 5861 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5862 } 5863 #endif 5864 { 5865 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5866 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5867 PetscBool roworiented = aij->roworiented; 5868 5869 /* Some Variables required in the macro */ 5870 Mat A = aij->A; 5871 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5872 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5873 MatScalar *aa = a->a; 5874 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5875 Mat B = aij->B; 5876 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5877 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5878 MatScalar *ba = b->a; 5879 5880 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5881 PetscInt nonew = a->nonew; 5882 MatScalar *ap1,*ap2; 5883 5884 PetscFunctionBegin; 5885 for (i=0; i<m; i++) { 5886 if (im[i] < 0) continue; 5887 #if defined(PETSC_USE_DEBUG) 5888 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5889 #endif 5890 if (im[i] >= rstart && im[i] < rend) { 5891 row = im[i] - rstart; 5892 lastcol1 = -1; 5893 rp1 = aj + ai[row]; 5894 ap1 = aa + ai[row]; 5895 rmax1 = aimax[row]; 5896 nrow1 = ailen[row]; 5897 low1 = 0; 5898 high1 = nrow1; 5899 lastcol2 = -1; 5900 rp2 = bj + bi[row]; 5901 ap2 = ba + bi[row]; 5902 rmax2 = bimax[row]; 5903 nrow2 = bilen[row]; 5904 low2 = 0; 5905 high2 = nrow2; 5906 5907 for (j=0; j<n; j++) { 5908 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5909 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5910 if (in[j] >= cstart && in[j] < cend){ 5911 col = in[j] - cstart; 5912 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5913 } else if (in[j] < 0) continue; 5914 #if defined(PETSC_USE_DEBUG) 5915 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5916 #endif 5917 else { 5918 if (mat->was_assembled) { 5919 if (!aij->colmap) { 5920 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5921 } 5922 #if defined (PETSC_USE_CTABLE) 5923 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5924 col--; 5925 #else 5926 col = aij->colmap[in[j]] - 1; 5927 #endif 5928 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5929 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5930 col = in[j]; 5931 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5932 B = aij->B; 5933 b = (Mat_SeqAIJ*)B->data; 5934 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5935 rp2 = bj + bi[row]; 5936 ap2 = ba + bi[row]; 5937 rmax2 = bimax[row]; 5938 nrow2 = bilen[row]; 5939 low2 = 0; 5940 high2 = nrow2; 5941 bm = aij->B->rmap->n; 5942 ba = b->a; 5943 } 5944 } else col = in[j]; 5945 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5946 } 5947 } 5948 } else { 5949 if (!aij->donotstash) { 5950 if (roworiented) { 5951 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5952 } else { 5953 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5954 } 5955 } 5956 } 5957 }} 5958 PetscFunctionReturnVoid(); 5959 } 5960 EXTERN_C_END 5961 5962