1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc-private/vecimpl.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 /*MC 8 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 9 10 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 11 and MATMPIAIJ otherwise. As a result, for single process communicators, 12 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 13 for communicators controlling multiple processes. It is recommended that you call both of 14 the above preallocation routines for simplicity. 15 16 Options Database Keys: 17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 18 19 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 20 enough exist. 21 22 Level: beginner 23 24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 25 M*/ 26 27 /*MC 28 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 29 30 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 31 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 32 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 33 for communicators controlling multiple processes. It is recommended that you call both of 34 the above preallocation routines for simplicity. 35 36 Options Database Keys: 37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 38 39 Level: beginner 40 41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 42 M*/ 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 47 { 48 PetscErrorCode ierr; 49 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 50 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 51 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 52 const PetscInt *ia,*ib; 53 const MatScalar *aa,*bb; 54 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 55 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 56 57 PetscFunctionBegin; 58 *keptrows = 0; 59 ia = a->i; 60 ib = b->i; 61 for (i=0; i<m; i++) { 62 na = ia[i+1] - ia[i]; 63 nb = ib[i+1] - ib[i]; 64 if (!na && !nb) { 65 cnt++; 66 goto ok1; 67 } 68 aa = a->a + ia[i]; 69 for (j=0; j<na; j++) { 70 if (aa[j] != 0.0) goto ok1; 71 } 72 bb = b->a + ib[i]; 73 for (j=0; j <nb; j++) { 74 if (bb[j] != 0.0) goto ok1; 75 } 76 cnt++; 77 ok1:; 78 } 79 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 80 if (!n0rows) PetscFunctionReturn(0); 81 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 82 cnt = 0; 83 for (i=0; i<m; i++) { 84 na = ia[i+1] - ia[i]; 85 nb = ib[i+1] - ib[i]; 86 if (!na && !nb) continue; 87 aa = a->a + ia[i]; 88 for (j=0; j<na;j++) { 89 if (aa[j] != 0.0) { 90 rows[cnt++] = rstart + i; 91 goto ok2; 92 } 93 } 94 bb = b->a + ib[i]; 95 for (j=0; j<nb; j++) { 96 if (bb[j] != 0.0) { 97 rows[cnt++] = rstart + i; 98 goto ok2; 99 } 100 } 101 ok2:; 102 } 103 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 104 PetscFunctionReturn(0); 105 } 106 107 #undef __FUNCT__ 108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 110 { 111 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 112 PetscErrorCode ierr; 113 PetscInt i,rstart,nrows,*rows; 114 115 PetscFunctionBegin; 116 *zrows = NULL; 117 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 118 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 119 for (i=0; i<nrows; i++) rows[i] += rstart; 120 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 121 PetscFunctionReturn(0); 122 } 123 124 #undef __FUNCT__ 125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 127 { 128 PetscErrorCode ierr; 129 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 130 PetscInt i,n,*garray = aij->garray; 131 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 132 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 133 PetscReal *work; 134 135 PetscFunctionBegin; 136 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 137 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 138 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 139 if (type == NORM_2) { 140 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 141 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 142 } 143 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 144 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 145 } 146 } else if (type == NORM_1) { 147 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 148 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 149 } 150 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 151 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 152 } 153 } else if (type == NORM_INFINITY) { 154 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 155 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 156 } 157 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 158 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 159 } 160 161 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 162 if (type == NORM_INFINITY) { 163 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 164 } else { 165 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 166 } 167 ierr = PetscFree(work);CHKERRQ(ierr); 168 if (type == NORM_2) { 169 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 170 } 171 PetscFunctionReturn(0); 172 } 173 174 #undef __FUNCT__ 175 #define __FUNCT__ "MatDistribute_MPIAIJ" 176 /* 177 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 178 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 179 180 Only for square matrices 181 182 Used by a preconditioner, hence PETSC_EXTERN 183 */ 184 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 185 { 186 PetscMPIInt rank,size; 187 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld,bses[2]; 188 PetscErrorCode ierr; 189 Mat mat; 190 Mat_SeqAIJ *gmata; 191 PetscMPIInt tag; 192 MPI_Status status; 193 PetscBool aij; 194 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 195 196 PetscFunctionBegin; 197 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 198 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 199 if (!rank) { 200 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 201 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 202 } 203 if (reuse == MAT_INITIAL_MATRIX) { 204 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 205 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 206 if (!rank) { 207 bses[0] = gmat->rmap->bs; 208 bses[1] = gmat->cmap->bs; 209 } 210 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 211 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 212 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 213 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 214 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 215 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 216 217 rowners[0] = 0; 218 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 219 rstart = rowners[rank]; 220 rend = rowners[rank+1]; 221 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 222 if (!rank) { 223 gmata = (Mat_SeqAIJ*) gmat->data; 224 /* send row lengths to all processors */ 225 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 226 for (i=1; i<size; i++) { 227 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 228 } 229 /* determine number diagonal and off-diagonal counts */ 230 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 231 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 232 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 233 jj = 0; 234 for (i=0; i<m; i++) { 235 for (j=0; j<dlens[i]; j++) { 236 if (gmata->j[jj] < rstart) ld[i]++; 237 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 238 jj++; 239 } 240 } 241 /* send column indices to other processes */ 242 for (i=1; i<size; i++) { 243 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 244 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 245 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 246 } 247 248 /* send numerical values to other processes */ 249 for (i=1; i<size; i++) { 250 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 251 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 252 } 253 gmataa = gmata->a; 254 gmataj = gmata->j; 255 256 } else { 257 /* receive row lengths */ 258 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 259 /* receive column indices */ 260 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 261 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 262 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 263 /* determine number diagonal and off-diagonal counts */ 264 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 265 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 266 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 267 jj = 0; 268 for (i=0; i<m; i++) { 269 for (j=0; j<dlens[i]; j++) { 270 if (gmataj[jj] < rstart) ld[i]++; 271 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 272 jj++; 273 } 274 } 275 /* receive numerical values */ 276 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 277 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 278 } 279 /* set preallocation */ 280 for (i=0; i<m; i++) { 281 dlens[i] -= olens[i]; 282 } 283 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 284 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 285 286 for (i=0; i<m; i++) { 287 dlens[i] += olens[i]; 288 } 289 cnt = 0; 290 for (i=0; i<m; i++) { 291 row = rstart + i; 292 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 293 cnt += dlens[i]; 294 } 295 if (rank) { 296 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 297 } 298 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 299 ierr = PetscFree(rowners);CHKERRQ(ierr); 300 301 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 302 303 *inmat = mat; 304 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 305 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 306 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 307 mat = *inmat; 308 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 309 if (!rank) { 310 /* send numerical values to other processes */ 311 gmata = (Mat_SeqAIJ*) gmat->data; 312 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 313 gmataa = gmata->a; 314 for (i=1; i<size; i++) { 315 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 316 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 317 } 318 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 319 } else { 320 /* receive numerical values from process 0*/ 321 nz = Ad->nz + Ao->nz; 322 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 323 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 324 } 325 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 326 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 327 ad = Ad->a; 328 ao = Ao->a; 329 if (mat->rmap->n) { 330 i = 0; 331 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 332 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 333 } 334 for (i=1; i<mat->rmap->n; i++) { 335 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 336 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 337 } 338 i--; 339 if (mat->rmap->n) { 340 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 341 } 342 if (rank) { 343 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 344 } 345 } 346 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 347 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 348 PetscFunctionReturn(0); 349 } 350 351 /* 352 Local utility routine that creates a mapping from the global column 353 number to the local number in the off-diagonal part of the local 354 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 355 a slightly higher hash table cost; without it it is not scalable (each processor 356 has an order N integer array but is fast to acess. 357 */ 358 #undef __FUNCT__ 359 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 360 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 361 { 362 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 363 PetscErrorCode ierr; 364 PetscInt n = aij->B->cmap->n,i; 365 366 PetscFunctionBegin; 367 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 368 #if defined(PETSC_USE_CTABLE) 369 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 370 for (i=0; i<n; i++) { 371 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 372 } 373 #else 374 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 375 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 376 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 377 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 378 #endif 379 PetscFunctionReturn(0); 380 } 381 382 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 383 { \ 384 if (col <= lastcol1) low1 = 0; \ 385 else high1 = nrow1; \ 386 lastcol1 = col;\ 387 while (high1-low1 > 5) { \ 388 t = (low1+high1)/2; \ 389 if (rp1[t] > col) high1 = t; \ 390 else low1 = t; \ 391 } \ 392 for (_i=low1; _i<high1; _i++) { \ 393 if (rp1[_i] > col) break; \ 394 if (rp1[_i] == col) { \ 395 if (addv == ADD_VALUES) ap1[_i] += value; \ 396 else ap1[_i] = value; \ 397 goto a_noinsert; \ 398 } \ 399 } \ 400 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 401 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 402 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 403 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 404 N = nrow1++ - 1; a->nz++; high1++; \ 405 /* shift up all the later entries in this row */ \ 406 for (ii=N; ii>=_i; ii--) { \ 407 rp1[ii+1] = rp1[ii]; \ 408 ap1[ii+1] = ap1[ii]; \ 409 } \ 410 rp1[_i] = col; \ 411 ap1[_i] = value; \ 412 a_noinsert: ; \ 413 ailen[row] = nrow1; \ 414 } 415 416 417 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 418 { \ 419 if (col <= lastcol2) low2 = 0; \ 420 else high2 = nrow2; \ 421 lastcol2 = col; \ 422 while (high2-low2 > 5) { \ 423 t = (low2+high2)/2; \ 424 if (rp2[t] > col) high2 = t; \ 425 else low2 = t; \ 426 } \ 427 for (_i=low2; _i<high2; _i++) { \ 428 if (rp2[_i] > col) break; \ 429 if (rp2[_i] == col) { \ 430 if (addv == ADD_VALUES) ap2[_i] += value; \ 431 else ap2[_i] = value; \ 432 goto b_noinsert; \ 433 } \ 434 } \ 435 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 436 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 437 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 438 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 439 N = nrow2++ - 1; b->nz++; high2++; \ 440 /* shift up all the later entries in this row */ \ 441 for (ii=N; ii>=_i; ii--) { \ 442 rp2[ii+1] = rp2[ii]; \ 443 ap2[ii+1] = ap2[ii]; \ 444 } \ 445 rp2[_i] = col; \ 446 ap2[_i] = value; \ 447 b_noinsert: ; \ 448 bilen[row] = nrow2; \ 449 } 450 451 #undef __FUNCT__ 452 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 453 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 454 { 455 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 456 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 457 PetscErrorCode ierr; 458 PetscInt l,*garray = mat->garray,diag; 459 460 PetscFunctionBegin; 461 /* code only works for square matrices A */ 462 463 /* find size of row to the left of the diagonal part */ 464 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 465 row = row - diag; 466 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 467 if (garray[b->j[b->i[row]+l]] > diag) break; 468 } 469 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 470 471 /* diagonal part */ 472 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 473 474 /* right of diagonal part */ 475 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 476 PetscFunctionReturn(0); 477 } 478 479 #undef __FUNCT__ 480 #define __FUNCT__ "MatSetValues_MPIAIJ" 481 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 482 { 483 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 484 PetscScalar value; 485 PetscErrorCode ierr; 486 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 487 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 488 PetscBool roworiented = aij->roworiented; 489 490 /* Some Variables required in the macro */ 491 Mat A = aij->A; 492 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 493 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 494 MatScalar *aa = a->a; 495 PetscBool ignorezeroentries = a->ignorezeroentries; 496 Mat B = aij->B; 497 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 498 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 499 MatScalar *ba = b->a; 500 501 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 502 PetscInt nonew; 503 MatScalar *ap1,*ap2; 504 505 PetscFunctionBegin; 506 if (v) PetscValidScalarPointer(v,6); 507 for (i=0; i<m; i++) { 508 if (im[i] < 0) continue; 509 #if defined(PETSC_USE_DEBUG) 510 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 511 #endif 512 if (im[i] >= rstart && im[i] < rend) { 513 row = im[i] - rstart; 514 lastcol1 = -1; 515 rp1 = aj + ai[row]; 516 ap1 = aa + ai[row]; 517 rmax1 = aimax[row]; 518 nrow1 = ailen[row]; 519 low1 = 0; 520 high1 = nrow1; 521 lastcol2 = -1; 522 rp2 = bj + bi[row]; 523 ap2 = ba + bi[row]; 524 rmax2 = bimax[row]; 525 nrow2 = bilen[row]; 526 low2 = 0; 527 high2 = nrow2; 528 529 for (j=0; j<n; j++) { 530 if (v) { 531 if (roworiented) value = v[i*n+j]; 532 else value = v[i+j*m]; 533 } else value = 0.0; 534 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 535 if (in[j] >= cstart && in[j] < cend) { 536 col = in[j] - cstart; 537 nonew = a->nonew; 538 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 539 } else if (in[j] < 0) continue; 540 #if defined(PETSC_USE_DEBUG) 541 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 542 #endif 543 else { 544 if (mat->was_assembled) { 545 if (!aij->colmap) { 546 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 547 } 548 #if defined(PETSC_USE_CTABLE) 549 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 550 col--; 551 #else 552 col = aij->colmap[in[j]] - 1; 553 #endif 554 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 555 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 556 col = in[j]; 557 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 558 B = aij->B; 559 b = (Mat_SeqAIJ*)B->data; 560 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 561 rp2 = bj + bi[row]; 562 ap2 = ba + bi[row]; 563 rmax2 = bimax[row]; 564 nrow2 = bilen[row]; 565 low2 = 0; 566 high2 = nrow2; 567 bm = aij->B->rmap->n; 568 ba = b->a; 569 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 570 } else col = in[j]; 571 nonew = b->nonew; 572 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 573 } 574 } 575 } else { 576 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 577 if (!aij->donotstash) { 578 mat->assembled = PETSC_FALSE; 579 if (roworiented) { 580 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 581 } else { 582 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 583 } 584 } 585 } 586 } 587 PetscFunctionReturn(0); 588 } 589 590 #undef __FUNCT__ 591 #define __FUNCT__ "MatGetValues_MPIAIJ" 592 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 593 { 594 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 595 PetscErrorCode ierr; 596 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 597 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 598 599 PetscFunctionBegin; 600 for (i=0; i<m; i++) { 601 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 602 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 603 if (idxm[i] >= rstart && idxm[i] < rend) { 604 row = idxm[i] - rstart; 605 for (j=0; j<n; j++) { 606 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 607 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 608 if (idxn[j] >= cstart && idxn[j] < cend) { 609 col = idxn[j] - cstart; 610 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 611 } else { 612 if (!aij->colmap) { 613 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 614 } 615 #if defined(PETSC_USE_CTABLE) 616 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 617 col--; 618 #else 619 col = aij->colmap[idxn[j]] - 1; 620 #endif 621 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 622 else { 623 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 624 } 625 } 626 } 627 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 628 } 629 PetscFunctionReturn(0); 630 } 631 632 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 633 634 #undef __FUNCT__ 635 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 636 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 637 { 638 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 639 PetscErrorCode ierr; 640 PetscInt nstash,reallocs; 641 InsertMode addv; 642 643 PetscFunctionBegin; 644 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 645 646 /* make sure all processors are either in INSERTMODE or ADDMODE */ 647 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 648 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 649 mat->insertmode = addv; /* in case this processor had no cache */ 650 651 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 652 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 653 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 654 PetscFunctionReturn(0); 655 } 656 657 #undef __FUNCT__ 658 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 659 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 660 { 661 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 662 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 663 PetscErrorCode ierr; 664 PetscMPIInt n; 665 PetscInt i,j,rstart,ncols,flg; 666 PetscInt *row,*col; 667 PetscBool other_disassembled; 668 PetscScalar *val; 669 InsertMode addv = mat->insertmode; 670 671 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 672 673 PetscFunctionBegin; 674 if (!aij->donotstash && !mat->nooffprocentries) { 675 while (1) { 676 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 677 if (!flg) break; 678 679 for (i=0; i<n; ) { 680 /* Now identify the consecutive vals belonging to the same row */ 681 for (j=i,rstart=row[j]; j<n; j++) { 682 if (row[j] != rstart) break; 683 } 684 if (j < n) ncols = j-i; 685 else ncols = n-i; 686 /* Now assemble all these values with a single function call */ 687 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 688 689 i = j; 690 } 691 } 692 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 693 } 694 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 695 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 696 697 /* determine if any processor has disassembled, if so we must 698 also disassemble ourselfs, in order that we may reassemble. */ 699 /* 700 if nonzero structure of submatrix B cannot change then we know that 701 no processor disassembled thus we can skip this stuff 702 */ 703 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 704 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 705 if (mat->was_assembled && !other_disassembled) { 706 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 707 } 708 } 709 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 710 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 711 } 712 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 713 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 714 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 715 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 716 717 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 718 719 aij->rowvalues = 0; 720 721 /* used by MatAXPY() */ 722 a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 723 a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 724 725 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 726 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 727 PetscFunctionReturn(0); 728 } 729 730 #undef __FUNCT__ 731 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 732 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 733 { 734 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 735 PetscErrorCode ierr; 736 737 PetscFunctionBegin; 738 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 739 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 740 PetscFunctionReturn(0); 741 } 742 743 #undef __FUNCT__ 744 #define __FUNCT__ "MatZeroRows_MPIAIJ" 745 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 746 { 747 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 748 PetscErrorCode ierr; 749 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 750 PetscInt i,*owners = A->rmap->range; 751 PetscInt *nprocs,j,idx,nsends,row; 752 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 753 PetscInt *rvalues,count,base,slen,*source; 754 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 755 MPI_Comm comm; 756 MPI_Request *send_waits,*recv_waits; 757 MPI_Status recv_status,*send_status; 758 const PetscScalar *xx; 759 PetscScalar *bb; 760 #if defined(PETSC_DEBUG) 761 PetscBool found = PETSC_FALSE; 762 #endif 763 764 PetscFunctionBegin; 765 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 766 /* first count number of contributors to each processor */ 767 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 768 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 769 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 770 j = 0; 771 for (i=0; i<N; i++) { 772 if (lastidx > (idx = rows[i])) j = 0; 773 lastidx = idx; 774 for (; j<size; j++) { 775 if (idx >= owners[j] && idx < owners[j+1]) { 776 nprocs[2*j]++; 777 nprocs[2*j+1] = 1; 778 owner[i] = j; 779 #if defined(PETSC_DEBUG) 780 found = PETSC_TRUE; 781 #endif 782 break; 783 } 784 } 785 #if defined(PETSC_DEBUG) 786 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 787 found = PETSC_FALSE; 788 #endif 789 } 790 nsends = 0; 791 for (i=0; i<size; i++) nsends += nprocs[2*i+1]; 792 793 if (A->nooffproczerorows) { 794 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 795 nrecvs = nsends; 796 nmax = N; 797 } else { 798 /* inform other processors of number of messages and max length*/ 799 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 800 } 801 802 /* post receives: */ 803 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 804 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 805 for (i=0; i<nrecvs; i++) { 806 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 807 } 808 809 /* do sends: 810 1) starts[i] gives the starting index in svalues for stuff going to 811 the ith processor 812 */ 813 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 814 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 815 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 816 817 starts[0] = 0; 818 for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 819 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 820 821 starts[0] = 0; 822 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 823 count = 0; 824 for (i=0; i<size; i++) { 825 if (nprocs[2*i+1]) { 826 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 827 } 828 } 829 ierr = PetscFree(starts);CHKERRQ(ierr); 830 831 base = owners[rank]; 832 833 /* wait on receives */ 834 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 835 count = nrecvs; slen = 0; 836 while (count) { 837 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 838 /* unpack receives into our local space */ 839 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 840 841 source[imdex] = recv_status.MPI_SOURCE; 842 lens[imdex] = n; 843 slen += n; 844 count--; 845 } 846 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 847 848 /* move the data into the send scatter */ 849 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 850 count = 0; 851 for (i=0; i<nrecvs; i++) { 852 values = rvalues + i*nmax; 853 for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base; 854 } 855 ierr = PetscFree(rvalues);CHKERRQ(ierr); 856 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 857 ierr = PetscFree(owner);CHKERRQ(ierr); 858 ierr = PetscFree(nprocs);CHKERRQ(ierr); 859 860 /* fix right hand side if needed */ 861 if (x && b) { 862 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 863 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 864 for (i=0; i<slen; i++) bb[lrows[i]] = diag*xx[lrows[i]]; 865 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 866 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 867 } 868 /* 869 Zero the required rows. If the "diagonal block" of the matrix 870 is square and the user wishes to set the diagonal we use separate 871 code so that MatSetValues() is not called for each diagonal allocating 872 new memory, thus calling lots of mallocs and slowing things down. 873 874 */ 875 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 876 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 877 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 878 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 879 } else if (diag != 0.0) { 880 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 881 if (((Mat_SeqAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 882 for (i = 0; i < slen; i++) { 883 row = lrows[i] + rstart; 884 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 885 } 886 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 887 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 888 } else { 889 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 890 } 891 ierr = PetscFree(lrows);CHKERRQ(ierr); 892 893 /* wait on sends */ 894 if (nsends) { 895 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 896 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 897 ierr = PetscFree(send_status);CHKERRQ(ierr); 898 } 899 ierr = PetscFree(send_waits);CHKERRQ(ierr); 900 ierr = PetscFree(svalues);CHKERRQ(ierr); 901 PetscFunctionReturn(0); 902 } 903 904 #undef __FUNCT__ 905 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 906 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 907 { 908 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 909 PetscErrorCode ierr; 910 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 911 PetscInt i,*owners = A->rmap->range; 912 PetscInt *nprocs,j,idx,nsends; 913 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 914 PetscInt *rvalues,count,base,slen,*source; 915 PetscInt *lens,*lrows,*values,m; 916 MPI_Comm comm; 917 MPI_Request *send_waits,*recv_waits; 918 MPI_Status recv_status,*send_status; 919 const PetscScalar *xx; 920 PetscScalar *bb,*mask; 921 Vec xmask,lmask; 922 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 923 const PetscInt *aj, *ii,*ridx; 924 PetscScalar *aa; 925 #if defined(PETSC_DEBUG) 926 PetscBool found = PETSC_FALSE; 927 #endif 928 929 PetscFunctionBegin; 930 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 931 /* first count number of contributors to each processor */ 932 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 933 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 934 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 935 j = 0; 936 for (i=0; i<N; i++) { 937 if (lastidx > (idx = rows[i])) j = 0; 938 lastidx = idx; 939 for (; j<size; j++) { 940 if (idx >= owners[j] && idx < owners[j+1]) { 941 nprocs[2*j]++; 942 nprocs[2*j+1] = 1; 943 owner[i] = j; 944 #if defined(PETSC_DEBUG) 945 found = PETSC_TRUE; 946 #endif 947 break; 948 } 949 } 950 #if defined(PETSC_DEBUG) 951 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 952 found = PETSC_FALSE; 953 #endif 954 } 955 nsends = 0; for (i=0; i<size; i++) nsends += nprocs[2*i+1]; 956 957 /* inform other processors of number of messages and max length*/ 958 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 959 960 /* post receives: */ 961 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 962 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 963 for (i=0; i<nrecvs; i++) { 964 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 965 } 966 967 /* do sends: 968 1) starts[i] gives the starting index in svalues for stuff going to 969 the ith processor 970 */ 971 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 972 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 973 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 974 975 starts[0] = 0; 976 for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 977 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 978 979 starts[0] = 0; 980 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 981 count = 0; 982 for (i=0; i<size; i++) { 983 if (nprocs[2*i+1]) { 984 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 985 } 986 } 987 ierr = PetscFree(starts);CHKERRQ(ierr); 988 989 base = owners[rank]; 990 991 /* wait on receives */ 992 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 993 count = nrecvs; slen = 0; 994 while (count) { 995 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 996 /* unpack receives into our local space */ 997 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 998 999 source[imdex] = recv_status.MPI_SOURCE; 1000 lens[imdex] = n; 1001 slen += n; 1002 count--; 1003 } 1004 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1005 1006 /* move the data into the send scatter */ 1007 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1008 count = 0; 1009 for (i=0; i<nrecvs; i++) { 1010 values = rvalues + i*nmax; 1011 for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base; 1012 } 1013 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1014 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1015 ierr = PetscFree(owner);CHKERRQ(ierr); 1016 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1017 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 1018 1019 /* zero diagonal part of matrix */ 1020 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 1021 1022 /* handle off diagonal part of matrix */ 1023 ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr); 1024 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1025 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1026 for (i=0; i<slen; i++) bb[lrows[i]] = 1; 1027 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1028 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1029 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1030 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1031 if (x) { 1032 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1033 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1034 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1035 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1036 } 1037 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1038 1039 /* remove zeroed rows of off diagonal matrix */ 1040 ii = aij->i; 1041 for (i=0; i<slen; i++) { 1042 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1043 } 1044 1045 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1046 if (aij->compressedrow.use) { 1047 m = aij->compressedrow.nrows; 1048 ii = aij->compressedrow.i; 1049 ridx = aij->compressedrow.rindex; 1050 for (i=0; i<m; i++) { 1051 n = ii[i+1] - ii[i]; 1052 aj = aij->j + ii[i]; 1053 aa = aij->a + ii[i]; 1054 1055 for (j=0; j<n; j++) { 1056 if (PetscAbsScalar(mask[*aj])) { 1057 if (b) bb[*ridx] -= *aa*xx[*aj]; 1058 *aa = 0.0; 1059 } 1060 aa++; 1061 aj++; 1062 } 1063 ridx++; 1064 } 1065 } else { /* do not use compressed row format */ 1066 m = l->B->rmap->n; 1067 for (i=0; i<m; i++) { 1068 n = ii[i+1] - ii[i]; 1069 aj = aij->j + ii[i]; 1070 aa = aij->a + ii[i]; 1071 for (j=0; j<n; j++) { 1072 if (PetscAbsScalar(mask[*aj])) { 1073 if (b) bb[i] -= *aa*xx[*aj]; 1074 *aa = 0.0; 1075 } 1076 aa++; 1077 aj++; 1078 } 1079 } 1080 } 1081 if (x) { 1082 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1083 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1084 } 1085 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1086 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1087 ierr = PetscFree(lrows);CHKERRQ(ierr); 1088 1089 /* wait on sends */ 1090 if (nsends) { 1091 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1092 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1093 ierr = PetscFree(send_status);CHKERRQ(ierr); 1094 } 1095 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1096 ierr = PetscFree(svalues);CHKERRQ(ierr); 1097 PetscFunctionReturn(0); 1098 } 1099 1100 #undef __FUNCT__ 1101 #define __FUNCT__ "MatMult_MPIAIJ" 1102 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1103 { 1104 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1105 PetscErrorCode ierr; 1106 PetscInt nt; 1107 1108 PetscFunctionBegin; 1109 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1110 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1111 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1112 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1113 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1114 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1115 PetscFunctionReturn(0); 1116 } 1117 1118 #undef __FUNCT__ 1119 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1120 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1121 { 1122 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1123 PetscErrorCode ierr; 1124 1125 PetscFunctionBegin; 1126 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1127 PetscFunctionReturn(0); 1128 } 1129 1130 #undef __FUNCT__ 1131 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1132 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1133 { 1134 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1135 PetscErrorCode ierr; 1136 1137 PetscFunctionBegin; 1138 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1139 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1140 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1141 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1142 PetscFunctionReturn(0); 1143 } 1144 1145 #undef __FUNCT__ 1146 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1147 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1148 { 1149 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1150 PetscErrorCode ierr; 1151 PetscBool merged; 1152 1153 PetscFunctionBegin; 1154 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1155 /* do nondiagonal part */ 1156 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1157 if (!merged) { 1158 /* send it on its way */ 1159 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1160 /* do local part */ 1161 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1162 /* receive remote parts: note this assumes the values are not actually */ 1163 /* added in yy until the next line, */ 1164 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1165 } else { 1166 /* do local part */ 1167 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1168 /* send it on its way */ 1169 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1170 /* values actually were received in the Begin() but we need to call this nop */ 1171 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1172 } 1173 PetscFunctionReturn(0); 1174 } 1175 1176 #undef __FUNCT__ 1177 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1178 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1179 { 1180 MPI_Comm comm; 1181 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1182 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1183 IS Me,Notme; 1184 PetscErrorCode ierr; 1185 PetscInt M,N,first,last,*notme,i; 1186 PetscMPIInt size; 1187 1188 PetscFunctionBegin; 1189 /* Easy test: symmetric diagonal block */ 1190 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1191 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1192 if (!*f) PetscFunctionReturn(0); 1193 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1194 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1195 if (size == 1) PetscFunctionReturn(0); 1196 1197 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1198 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1199 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1200 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1201 for (i=0; i<first; i++) notme[i] = i; 1202 for (i=last; i<M; i++) notme[i-last+first] = i; 1203 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1204 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1205 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1206 Aoff = Aoffs[0]; 1207 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1208 Boff = Boffs[0]; 1209 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1210 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1211 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1212 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1213 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1214 ierr = PetscFree(notme);CHKERRQ(ierr); 1215 PetscFunctionReturn(0); 1216 } 1217 1218 #undef __FUNCT__ 1219 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1220 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1221 { 1222 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1223 PetscErrorCode ierr; 1224 1225 PetscFunctionBegin; 1226 /* do nondiagonal part */ 1227 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1228 /* send it on its way */ 1229 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1230 /* do local part */ 1231 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1232 /* receive remote parts */ 1233 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1234 PetscFunctionReturn(0); 1235 } 1236 1237 /* 1238 This only works correctly for square matrices where the subblock A->A is the 1239 diagonal block 1240 */ 1241 #undef __FUNCT__ 1242 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1243 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1244 { 1245 PetscErrorCode ierr; 1246 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1247 1248 PetscFunctionBegin; 1249 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1250 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1251 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1252 PetscFunctionReturn(0); 1253 } 1254 1255 #undef __FUNCT__ 1256 #define __FUNCT__ "MatScale_MPIAIJ" 1257 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1258 { 1259 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1260 PetscErrorCode ierr; 1261 1262 PetscFunctionBegin; 1263 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1264 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1265 PetscFunctionReturn(0); 1266 } 1267 1268 #undef __FUNCT__ 1269 #define __FUNCT__ "MatDestroy_MPIAIJ" 1270 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1271 { 1272 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1273 PetscErrorCode ierr; 1274 1275 PetscFunctionBegin; 1276 #if defined(PETSC_USE_LOG) 1277 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1278 #endif 1279 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1280 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1281 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1282 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1283 #if defined(PETSC_USE_CTABLE) 1284 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1285 #else 1286 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1287 #endif 1288 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1289 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1290 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1291 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1292 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1293 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1294 1295 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1296 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1297 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1298 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1299 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1303 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1304 PetscFunctionReturn(0); 1305 } 1306 1307 #undef __FUNCT__ 1308 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1309 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1310 { 1311 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1312 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1313 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1314 PetscErrorCode ierr; 1315 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1316 int fd; 1317 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1318 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1319 PetscScalar *column_values; 1320 PetscInt message_count,flowcontrolcount; 1321 FILE *file; 1322 1323 PetscFunctionBegin; 1324 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1325 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1326 nz = A->nz + B->nz; 1327 if (!rank) { 1328 header[0] = MAT_FILE_CLASSID; 1329 header[1] = mat->rmap->N; 1330 header[2] = mat->cmap->N; 1331 1332 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1333 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1334 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1335 /* get largest number of rows any processor has */ 1336 rlen = mat->rmap->n; 1337 range = mat->rmap->range; 1338 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1339 } else { 1340 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1341 rlen = mat->rmap->n; 1342 } 1343 1344 /* load up the local row counts */ 1345 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1346 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1347 1348 /* store the row lengths to the file */ 1349 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1350 if (!rank) { 1351 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1352 for (i=1; i<size; i++) { 1353 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1354 rlen = range[i+1] - range[i]; 1355 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1356 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1357 } 1358 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1359 } else { 1360 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1361 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1362 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1363 } 1364 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1365 1366 /* load up the local column indices */ 1367 nzmax = nz; /* th processor needs space a largest processor needs */ 1368 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1369 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1370 cnt = 0; 1371 for (i=0; i<mat->rmap->n; i++) { 1372 for (j=B->i[i]; j<B->i[i+1]; j++) { 1373 if ((col = garray[B->j[j]]) > cstart) break; 1374 column_indices[cnt++] = col; 1375 } 1376 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1377 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1378 } 1379 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1380 1381 /* store the column indices to the file */ 1382 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1383 if (!rank) { 1384 MPI_Status status; 1385 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1386 for (i=1; i<size; i++) { 1387 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1388 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1389 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1390 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1391 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1392 } 1393 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1394 } else { 1395 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1396 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1397 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1398 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1399 } 1400 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1401 1402 /* load up the local column values */ 1403 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1404 cnt = 0; 1405 for (i=0; i<mat->rmap->n; i++) { 1406 for (j=B->i[i]; j<B->i[i+1]; j++) { 1407 if (garray[B->j[j]] > cstart) break; 1408 column_values[cnt++] = B->a[j]; 1409 } 1410 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1411 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1412 } 1413 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1414 1415 /* store the column values to the file */ 1416 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1417 if (!rank) { 1418 MPI_Status status; 1419 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1420 for (i=1; i<size; i++) { 1421 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1422 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1423 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1424 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1425 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1426 } 1427 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1428 } else { 1429 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1430 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1431 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1432 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1433 } 1434 ierr = PetscFree(column_values);CHKERRQ(ierr); 1435 1436 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1437 if (file) fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1438 PetscFunctionReturn(0); 1439 } 1440 1441 #include <petscdraw.h> 1442 #undef __FUNCT__ 1443 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1444 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1445 { 1446 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1447 PetscErrorCode ierr; 1448 PetscMPIInt rank = aij->rank,size = aij->size; 1449 PetscBool isdraw,iascii,isbinary; 1450 PetscViewer sviewer; 1451 PetscViewerFormat format; 1452 1453 PetscFunctionBegin; 1454 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1455 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1456 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1457 if (iascii) { 1458 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1459 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1460 MatInfo info; 1461 PetscBool inodes; 1462 1463 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1464 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1465 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1466 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1467 if (!inodes) { 1468 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1469 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1470 } else { 1471 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1472 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1473 } 1474 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1475 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1476 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1477 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1478 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1479 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1480 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1481 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1482 PetscFunctionReturn(0); 1483 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1484 PetscInt inodecount,inodelimit,*inodes; 1485 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1486 if (inodes) { 1487 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1488 } else { 1489 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1490 } 1491 PetscFunctionReturn(0); 1492 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1493 PetscFunctionReturn(0); 1494 } 1495 } else if (isbinary) { 1496 if (size == 1) { 1497 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1498 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1499 } else { 1500 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1501 } 1502 PetscFunctionReturn(0); 1503 } else if (isdraw) { 1504 PetscDraw draw; 1505 PetscBool isnull; 1506 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1507 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1508 } 1509 1510 if (size == 1) { 1511 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1512 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1513 } else { 1514 /* assemble the entire matrix onto first processor. */ 1515 Mat A; 1516 Mat_SeqAIJ *Aloc; 1517 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1518 MatScalar *a; 1519 1520 if (mat->rmap->N > 1024) { 1521 PetscBool flg = PETSC_FALSE; 1522 1523 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,NULL);CHKERRQ(ierr); 1524 if (!flg) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1525 } 1526 1527 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1528 if (!rank) { 1529 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1530 } else { 1531 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1532 } 1533 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1534 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1535 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1536 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1537 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1538 1539 /* copy over the A part */ 1540 Aloc = (Mat_SeqAIJ*)aij->A->data; 1541 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1542 row = mat->rmap->rstart; 1543 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1544 for (i=0; i<m; i++) { 1545 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1546 row++; 1547 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1548 } 1549 aj = Aloc->j; 1550 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1551 1552 /* copy over the B part */ 1553 Aloc = (Mat_SeqAIJ*)aij->B->data; 1554 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1555 row = mat->rmap->rstart; 1556 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1557 ct = cols; 1558 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1559 for (i=0; i<m; i++) { 1560 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1561 row++; 1562 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1563 } 1564 ierr = PetscFree(ct);CHKERRQ(ierr); 1565 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1566 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1567 /* 1568 Everyone has to call to draw the matrix since the graphics waits are 1569 synchronized across all processors that share the PetscDraw object 1570 */ 1571 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1572 if (!rank) { 1573 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1574 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1575 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1576 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1577 } 1578 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1579 ierr = MatDestroy(&A);CHKERRQ(ierr); 1580 } 1581 PetscFunctionReturn(0); 1582 } 1583 1584 #undef __FUNCT__ 1585 #define __FUNCT__ "MatView_MPIAIJ" 1586 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1587 { 1588 PetscErrorCode ierr; 1589 PetscBool iascii,isdraw,issocket,isbinary; 1590 1591 PetscFunctionBegin; 1592 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1593 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1594 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1595 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1596 if (iascii || isdraw || isbinary || issocket) { 1597 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1598 } 1599 PetscFunctionReturn(0); 1600 } 1601 1602 #undef __FUNCT__ 1603 #define __FUNCT__ "MatSOR_MPIAIJ" 1604 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1605 { 1606 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1607 PetscErrorCode ierr; 1608 Vec bb1 = 0; 1609 PetscBool hasop; 1610 1611 PetscFunctionBegin; 1612 if (flag == SOR_APPLY_UPPER) { 1613 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1614 PetscFunctionReturn(0); 1615 } 1616 1617 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1618 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1619 } 1620 1621 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1622 if (flag & SOR_ZERO_INITIAL_GUESS) { 1623 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1624 its--; 1625 } 1626 1627 while (its--) { 1628 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1629 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1630 1631 /* update rhs: bb1 = bb - B*x */ 1632 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1633 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1634 1635 /* local sweep */ 1636 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1637 } 1638 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1639 if (flag & SOR_ZERO_INITIAL_GUESS) { 1640 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1641 its--; 1642 } 1643 while (its--) { 1644 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1645 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1646 1647 /* update rhs: bb1 = bb - B*x */ 1648 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1649 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1650 1651 /* local sweep */ 1652 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1653 } 1654 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1655 if (flag & SOR_ZERO_INITIAL_GUESS) { 1656 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1657 its--; 1658 } 1659 while (its--) { 1660 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1661 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1662 1663 /* update rhs: bb1 = bb - B*x */ 1664 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1665 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1666 1667 /* local sweep */ 1668 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1669 } 1670 } else if (flag & SOR_EISENSTAT) { 1671 Vec xx1; 1672 1673 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1674 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1675 1676 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1677 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1678 if (!mat->diag) { 1679 ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1680 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1681 } 1682 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1683 if (hasop) { 1684 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1685 } else { 1686 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1687 } 1688 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1689 1690 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1691 1692 /* local sweep */ 1693 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1694 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1695 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1696 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1697 1698 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1699 PetscFunctionReturn(0); 1700 } 1701 1702 #undef __FUNCT__ 1703 #define __FUNCT__ "MatPermute_MPIAIJ" 1704 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1705 { 1706 Mat aA,aB,Aperm; 1707 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1708 PetscScalar *aa,*ba; 1709 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1710 PetscSF rowsf,sf; 1711 IS parcolp = NULL; 1712 PetscBool done; 1713 PetscErrorCode ierr; 1714 1715 PetscFunctionBegin; 1716 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1717 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1718 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1719 ierr = PetscMalloc3(PetscMax(m,n),PetscInt,&work,m,PetscInt,&rdest,n,PetscInt,&cdest);CHKERRQ(ierr); 1720 1721 /* Invert row permutation to find out where my rows should go */ 1722 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1723 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1724 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1725 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1726 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1727 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1728 1729 /* Invert column permutation to find out where my columns should go */ 1730 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1731 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1732 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1733 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1734 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1735 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1736 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1737 1738 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1739 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1740 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1741 1742 /* Find out where my gcols should go */ 1743 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1744 ierr = PetscMalloc(ng*sizeof(PetscInt),&gcdest);CHKERRQ(ierr); 1745 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1746 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1747 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1748 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1749 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1750 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1751 1752 ierr = PetscMalloc4(m,PetscInt,&dnnz,m,PetscInt,&onnz,m,PetscInt,&tdnnz,m,PetscInt,&tonnz);CHKERRQ(ierr); 1753 ierr = PetscMemzero(dnnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1754 ierr = PetscMemzero(onnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1755 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1756 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1757 for (i=0; i<m; i++) { 1758 PetscInt row = rdest[i],rowner; 1759 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1760 for (j=ai[i]; j<ai[i+1]; j++) { 1761 PetscInt cowner,col = cdest[aj[j]]; 1762 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1763 if (rowner == cowner) dnnz[i]++; 1764 else onnz[i]++; 1765 } 1766 for (j=bi[i]; j<bi[i+1]; j++) { 1767 PetscInt cowner,col = gcdest[bj[j]]; 1768 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1769 if (rowner == cowner) dnnz[i]++; 1770 else onnz[i]++; 1771 } 1772 } 1773 ierr = PetscMemzero(tdnnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1774 ierr = PetscMemzero(tonnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1775 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1776 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1777 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1778 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1779 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1780 1781 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1782 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1783 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1784 for (i=0; i<m; i++) { 1785 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1786 PetscInt rowlen; 1787 rowlen = ai[i+1] - ai[i]; 1788 for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]]; 1789 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr); 1790 rowlen = bi[i+1] - bi[i]; 1791 for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]]; 1792 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr); 1793 } 1794 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1795 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1796 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1797 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1798 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1799 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1800 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1801 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1802 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1803 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1804 *B = Aperm; 1805 PetscFunctionReturn(0); 1806 } 1807 1808 #undef __FUNCT__ 1809 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1810 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1811 { 1812 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1813 Mat A = mat->A,B = mat->B; 1814 PetscErrorCode ierr; 1815 PetscReal isend[5],irecv[5]; 1816 1817 PetscFunctionBegin; 1818 info->block_size = 1.0; 1819 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1820 1821 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1822 isend[3] = info->memory; isend[4] = info->mallocs; 1823 1824 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1825 1826 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1827 isend[3] += info->memory; isend[4] += info->mallocs; 1828 if (flag == MAT_LOCAL) { 1829 info->nz_used = isend[0]; 1830 info->nz_allocated = isend[1]; 1831 info->nz_unneeded = isend[2]; 1832 info->memory = isend[3]; 1833 info->mallocs = isend[4]; 1834 } else if (flag == MAT_GLOBAL_MAX) { 1835 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1836 1837 info->nz_used = irecv[0]; 1838 info->nz_allocated = irecv[1]; 1839 info->nz_unneeded = irecv[2]; 1840 info->memory = irecv[3]; 1841 info->mallocs = irecv[4]; 1842 } else if (flag == MAT_GLOBAL_SUM) { 1843 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1844 1845 info->nz_used = irecv[0]; 1846 info->nz_allocated = irecv[1]; 1847 info->nz_unneeded = irecv[2]; 1848 info->memory = irecv[3]; 1849 info->mallocs = irecv[4]; 1850 } 1851 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1852 info->fill_ratio_needed = 0; 1853 info->factor_mallocs = 0; 1854 PetscFunctionReturn(0); 1855 } 1856 1857 #undef __FUNCT__ 1858 #define __FUNCT__ "MatSetOption_MPIAIJ" 1859 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1860 { 1861 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1862 PetscErrorCode ierr; 1863 1864 PetscFunctionBegin; 1865 switch (op) { 1866 case MAT_NEW_NONZERO_LOCATIONS: 1867 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1868 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1869 case MAT_KEEP_NONZERO_PATTERN: 1870 case MAT_NEW_NONZERO_LOCATION_ERR: 1871 case MAT_USE_INODES: 1872 case MAT_IGNORE_ZERO_ENTRIES: 1873 MatCheckPreallocated(A,1); 1874 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1875 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1876 break; 1877 case MAT_ROW_ORIENTED: 1878 a->roworiented = flg; 1879 1880 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1881 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1882 break; 1883 case MAT_NEW_DIAGONALS: 1884 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1885 break; 1886 case MAT_IGNORE_OFF_PROC_ENTRIES: 1887 a->donotstash = flg; 1888 break; 1889 case MAT_SPD: 1890 A->spd_set = PETSC_TRUE; 1891 A->spd = flg; 1892 if (flg) { 1893 A->symmetric = PETSC_TRUE; 1894 A->structurally_symmetric = PETSC_TRUE; 1895 A->symmetric_set = PETSC_TRUE; 1896 A->structurally_symmetric_set = PETSC_TRUE; 1897 } 1898 break; 1899 case MAT_SYMMETRIC: 1900 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1901 break; 1902 case MAT_STRUCTURALLY_SYMMETRIC: 1903 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1904 break; 1905 case MAT_HERMITIAN: 1906 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1907 break; 1908 case MAT_SYMMETRY_ETERNAL: 1909 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1910 break; 1911 default: 1912 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1913 } 1914 PetscFunctionReturn(0); 1915 } 1916 1917 #undef __FUNCT__ 1918 #define __FUNCT__ "MatGetRow_MPIAIJ" 1919 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1920 { 1921 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1922 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1923 PetscErrorCode ierr; 1924 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1925 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1926 PetscInt *cmap,*idx_p; 1927 1928 PetscFunctionBegin; 1929 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1930 mat->getrowactive = PETSC_TRUE; 1931 1932 if (!mat->rowvalues && (idx || v)) { 1933 /* 1934 allocate enough space to hold information from the longest row. 1935 */ 1936 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1937 PetscInt max = 1,tmp; 1938 for (i=0; i<matin->rmap->n; i++) { 1939 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1940 if (max < tmp) max = tmp; 1941 } 1942 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1943 } 1944 1945 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1946 lrow = row - rstart; 1947 1948 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1949 if (!v) {pvA = 0; pvB = 0;} 1950 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1951 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1952 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1953 nztot = nzA + nzB; 1954 1955 cmap = mat->garray; 1956 if (v || idx) { 1957 if (nztot) { 1958 /* Sort by increasing column numbers, assuming A and B already sorted */ 1959 PetscInt imark = -1; 1960 if (v) { 1961 *v = v_p = mat->rowvalues; 1962 for (i=0; i<nzB; i++) { 1963 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1964 else break; 1965 } 1966 imark = i; 1967 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1968 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1969 } 1970 if (idx) { 1971 *idx = idx_p = mat->rowindices; 1972 if (imark > -1) { 1973 for (i=0; i<imark; i++) { 1974 idx_p[i] = cmap[cworkB[i]]; 1975 } 1976 } else { 1977 for (i=0; i<nzB; i++) { 1978 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1979 else break; 1980 } 1981 imark = i; 1982 } 1983 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1984 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1985 } 1986 } else { 1987 if (idx) *idx = 0; 1988 if (v) *v = 0; 1989 } 1990 } 1991 *nz = nztot; 1992 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1993 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1994 PetscFunctionReturn(0); 1995 } 1996 1997 #undef __FUNCT__ 1998 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1999 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 2000 { 2001 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2002 2003 PetscFunctionBegin; 2004 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 2005 aij->getrowactive = PETSC_FALSE; 2006 PetscFunctionReturn(0); 2007 } 2008 2009 #undef __FUNCT__ 2010 #define __FUNCT__ "MatNorm_MPIAIJ" 2011 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 2012 { 2013 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2014 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 2015 PetscErrorCode ierr; 2016 PetscInt i,j,cstart = mat->cmap->rstart; 2017 PetscReal sum = 0.0; 2018 MatScalar *v; 2019 2020 PetscFunctionBegin; 2021 if (aij->size == 1) { 2022 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 2023 } else { 2024 if (type == NORM_FROBENIUS) { 2025 v = amat->a; 2026 for (i=0; i<amat->nz; i++) { 2027 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 2028 } 2029 v = bmat->a; 2030 for (i=0; i<bmat->nz; i++) { 2031 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 2032 } 2033 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2034 *norm = PetscSqrtReal(*norm); 2035 } else if (type == NORM_1) { /* max column norm */ 2036 PetscReal *tmp,*tmp2; 2037 PetscInt *jj,*garray = aij->garray; 2038 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 2039 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 2040 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 2041 *norm = 0.0; 2042 v = amat->a; jj = amat->j; 2043 for (j=0; j<amat->nz; j++) { 2044 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 2045 } 2046 v = bmat->a; jj = bmat->j; 2047 for (j=0; j<bmat->nz; j++) { 2048 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 2049 } 2050 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2051 for (j=0; j<mat->cmap->N; j++) { 2052 if (tmp2[j] > *norm) *norm = tmp2[j]; 2053 } 2054 ierr = PetscFree(tmp);CHKERRQ(ierr); 2055 ierr = PetscFree(tmp2);CHKERRQ(ierr); 2056 } else if (type == NORM_INFINITY) { /* max row norm */ 2057 PetscReal ntemp = 0.0; 2058 for (j=0; j<aij->A->rmap->n; j++) { 2059 v = amat->a + amat->i[j]; 2060 sum = 0.0; 2061 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 2062 sum += PetscAbsScalar(*v); v++; 2063 } 2064 v = bmat->a + bmat->i[j]; 2065 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2066 sum += PetscAbsScalar(*v); v++; 2067 } 2068 if (sum > ntemp) ntemp = sum; 2069 } 2070 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2071 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 2072 } 2073 PetscFunctionReturn(0); 2074 } 2075 2076 #undef __FUNCT__ 2077 #define __FUNCT__ "MatTranspose_MPIAIJ" 2078 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2079 { 2080 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2081 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2082 PetscErrorCode ierr; 2083 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 2084 PetscInt cstart = A->cmap->rstart,ncol; 2085 Mat B; 2086 MatScalar *array; 2087 2088 PetscFunctionBegin; 2089 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2090 2091 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2092 ai = Aloc->i; aj = Aloc->j; 2093 bi = Bloc->i; bj = Bloc->j; 2094 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2095 PetscInt *d_nnz,*g_nnz,*o_nnz; 2096 PetscSFNode *oloc; 2097 PETSC_UNUSED PetscSF sf; 2098 2099 ierr = PetscMalloc4(na,PetscInt,&d_nnz,na,PetscInt,&o_nnz,nb,PetscInt,&g_nnz,nb,PetscSFNode,&oloc);CHKERRQ(ierr); 2100 /* compute d_nnz for preallocation */ 2101 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2102 for (i=0; i<ai[ma]; i++) { 2103 d_nnz[aj[i]]++; 2104 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2105 } 2106 /* compute local off-diagonal contributions */ 2107 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 2108 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2109 /* map those to global */ 2110 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2111 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2112 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2113 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2114 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2115 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2116 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2117 2118 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2119 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2120 ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);CHKERRQ(ierr); 2121 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2122 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2123 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2124 } else { 2125 B = *matout; 2126 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2127 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2128 } 2129 2130 /* copy over the A part */ 2131 array = Aloc->a; 2132 row = A->rmap->rstart; 2133 for (i=0; i<ma; i++) { 2134 ncol = ai[i+1]-ai[i]; 2135 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2136 row++; 2137 array += ncol; aj += ncol; 2138 } 2139 aj = Aloc->j; 2140 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2141 2142 /* copy over the B part */ 2143 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2144 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2145 array = Bloc->a; 2146 row = A->rmap->rstart; 2147 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2148 cols_tmp = cols; 2149 for (i=0; i<mb; i++) { 2150 ncol = bi[i+1]-bi[i]; 2151 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2152 row++; 2153 array += ncol; cols_tmp += ncol; 2154 } 2155 ierr = PetscFree(cols);CHKERRQ(ierr); 2156 2157 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2158 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2159 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2160 *matout = B; 2161 } else { 2162 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2163 } 2164 PetscFunctionReturn(0); 2165 } 2166 2167 #undef __FUNCT__ 2168 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2169 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2170 { 2171 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2172 Mat a = aij->A,b = aij->B; 2173 PetscErrorCode ierr; 2174 PetscInt s1,s2,s3; 2175 2176 PetscFunctionBegin; 2177 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2178 if (rr) { 2179 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2180 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2181 /* Overlap communication with computation. */ 2182 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2183 } 2184 if (ll) { 2185 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2186 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2187 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2188 } 2189 /* scale the diagonal block */ 2190 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2191 2192 if (rr) { 2193 /* Do a scatter end and then right scale the off-diagonal block */ 2194 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2195 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2196 } 2197 PetscFunctionReturn(0); 2198 } 2199 2200 #undef __FUNCT__ 2201 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2202 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2203 { 2204 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2205 PetscErrorCode ierr; 2206 2207 PetscFunctionBegin; 2208 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2209 PetscFunctionReturn(0); 2210 } 2211 2212 #undef __FUNCT__ 2213 #define __FUNCT__ "MatEqual_MPIAIJ" 2214 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2215 { 2216 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2217 Mat a,b,c,d; 2218 PetscBool flg; 2219 PetscErrorCode ierr; 2220 2221 PetscFunctionBegin; 2222 a = matA->A; b = matA->B; 2223 c = matB->A; d = matB->B; 2224 2225 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2226 if (flg) { 2227 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2228 } 2229 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2230 PetscFunctionReturn(0); 2231 } 2232 2233 #undef __FUNCT__ 2234 #define __FUNCT__ "MatCopy_MPIAIJ" 2235 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2236 { 2237 PetscErrorCode ierr; 2238 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2239 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2240 2241 PetscFunctionBegin; 2242 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2243 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2244 /* because of the column compression in the off-processor part of the matrix a->B, 2245 the number of columns in a->B and b->B may be different, hence we cannot call 2246 the MatCopy() directly on the two parts. If need be, we can provide a more 2247 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2248 then copying the submatrices */ 2249 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2250 } else { 2251 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2252 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2253 } 2254 PetscFunctionReturn(0); 2255 } 2256 2257 #undef __FUNCT__ 2258 #define __FUNCT__ "MatSetUp_MPIAIJ" 2259 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2260 { 2261 PetscErrorCode ierr; 2262 2263 PetscFunctionBegin; 2264 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2265 PetscFunctionReturn(0); 2266 } 2267 2268 #undef __FUNCT__ 2269 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2270 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2271 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2272 { 2273 PetscInt i,m=Y->rmap->N; 2274 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2275 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2276 const PetscInt *xi = x->i,*yi = y->i; 2277 2278 PetscFunctionBegin; 2279 /* Set the number of nonzeros in the new matrix */ 2280 for (i=0; i<m; i++) { 2281 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2282 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2283 nnz[i] = 0; 2284 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2285 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2286 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2287 nnz[i]++; 2288 } 2289 for (; k<nzy; k++) nnz[i]++; 2290 } 2291 PetscFunctionReturn(0); 2292 } 2293 2294 #undef __FUNCT__ 2295 #define __FUNCT__ "MatAXPY_MPIAIJ" 2296 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2297 { 2298 PetscErrorCode ierr; 2299 PetscInt i; 2300 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2301 PetscBLASInt bnz,one=1; 2302 Mat_SeqAIJ *x,*y; 2303 2304 PetscFunctionBegin; 2305 if (str == SAME_NONZERO_PATTERN) { 2306 PetscScalar alpha = a; 2307 x = (Mat_SeqAIJ*)xx->A->data; 2308 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2309 y = (Mat_SeqAIJ*)yy->A->data; 2310 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2311 x = (Mat_SeqAIJ*)xx->B->data; 2312 y = (Mat_SeqAIJ*)yy->B->data; 2313 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2314 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2315 } else if (str == SUBSET_NONZERO_PATTERN) { 2316 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2317 2318 x = (Mat_SeqAIJ*)xx->B->data; 2319 y = (Mat_SeqAIJ*)yy->B->data; 2320 if (y->xtoy && y->XtoY != xx->B) { 2321 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2322 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2323 } 2324 if (!y->xtoy) { /* get xtoy */ 2325 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2326 y->XtoY = xx->B; 2327 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2328 } 2329 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2330 } else { 2331 Mat B; 2332 PetscInt *nnz_d,*nnz_o; 2333 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2334 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2335 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2336 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2337 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2338 ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr); 2339 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2340 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2341 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2342 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2343 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2344 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2345 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2346 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2347 } 2348 PetscFunctionReturn(0); 2349 } 2350 2351 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2352 2353 #undef __FUNCT__ 2354 #define __FUNCT__ "MatConjugate_MPIAIJ" 2355 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2356 { 2357 #if defined(PETSC_USE_COMPLEX) 2358 PetscErrorCode ierr; 2359 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2360 2361 PetscFunctionBegin; 2362 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2363 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2364 #else 2365 PetscFunctionBegin; 2366 #endif 2367 PetscFunctionReturn(0); 2368 } 2369 2370 #undef __FUNCT__ 2371 #define __FUNCT__ "MatRealPart_MPIAIJ" 2372 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2373 { 2374 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2375 PetscErrorCode ierr; 2376 2377 PetscFunctionBegin; 2378 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2379 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2380 PetscFunctionReturn(0); 2381 } 2382 2383 #undef __FUNCT__ 2384 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2385 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2386 { 2387 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2388 PetscErrorCode ierr; 2389 2390 PetscFunctionBegin; 2391 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2392 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2393 PetscFunctionReturn(0); 2394 } 2395 2396 #if defined(PETSC_HAVE_PBGL) 2397 2398 #include <boost/parallel/mpi/bsp_process_group.hpp> 2399 #include <boost/graph/distributed/ilu_default_graph.hpp> 2400 #include <boost/graph/distributed/ilu_0_block.hpp> 2401 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2402 #include <boost/graph/distributed/petsc/interface.hpp> 2403 #include <boost/multi_array.hpp> 2404 #include <boost/parallel/distributed_property_map->hpp> 2405 2406 #undef __FUNCT__ 2407 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2408 /* 2409 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2410 */ 2411 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2412 { 2413 namespace petsc = boost::distributed::petsc; 2414 2415 namespace graph_dist = boost::graph::distributed; 2416 using boost::graph::distributed::ilu_default::process_group_type; 2417 using boost::graph::ilu_permuted; 2418 2419 PetscBool row_identity, col_identity; 2420 PetscContainer c; 2421 PetscInt m, n, M, N; 2422 PetscErrorCode ierr; 2423 2424 PetscFunctionBegin; 2425 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2426 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2427 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2428 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2429 2430 process_group_type pg; 2431 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2432 lgraph_type *lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2433 lgraph_type& level_graph = *lgraph_p; 2434 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2435 2436 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2437 ilu_permuted(level_graph); 2438 2439 /* put together the new matrix */ 2440 ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr); 2441 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2442 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2443 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2444 ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 2445 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2446 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2447 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2448 2449 ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c); 2450 ierr = PetscContainerSetPointer(c, lgraph_p); 2451 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2452 ierr = PetscContainerDestroy(&c); 2453 PetscFunctionReturn(0); 2454 } 2455 2456 #undef __FUNCT__ 2457 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2458 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2459 { 2460 PetscFunctionBegin; 2461 PetscFunctionReturn(0); 2462 } 2463 2464 #undef __FUNCT__ 2465 #define __FUNCT__ "MatSolve_MPIAIJ" 2466 /* 2467 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2468 */ 2469 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2470 { 2471 namespace graph_dist = boost::graph::distributed; 2472 2473 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2474 lgraph_type *lgraph_p; 2475 PetscContainer c; 2476 PetscErrorCode ierr; 2477 2478 PetscFunctionBegin; 2479 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr); 2480 ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr); 2481 ierr = VecCopy(b, x);CHKERRQ(ierr); 2482 2483 PetscScalar *array_x; 2484 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2485 PetscInt sx; 2486 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2487 2488 PetscScalar *array_b; 2489 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2490 PetscInt sb; 2491 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2492 2493 lgraph_type& level_graph = *lgraph_p; 2494 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2495 2496 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2497 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]); 2498 array_ref_type ref_x(array_x, boost::extents[num_vertices(graph)]); 2499 2500 typedef boost::iterator_property_map<array_ref_type::iterator, 2501 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2502 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)); 2503 gvector_type vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2504 2505 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2506 PetscFunctionReturn(0); 2507 } 2508 #endif 2509 2510 #undef __FUNCT__ 2511 #define __FUNCT__ "MatDestroy_MatRedundant" 2512 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2513 { 2514 PetscErrorCode ierr; 2515 Mat_Redundant *redund; 2516 PetscInt i; 2517 PetscMPIInt size; 2518 2519 PetscFunctionBegin; 2520 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2521 if (size == 1) { 2522 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2523 redund = a->redundant; 2524 } else { 2525 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2526 redund = a->redundant; 2527 } 2528 if (redund){ 2529 if (redund->matseq) { /* via MatGetSubMatrices() */ 2530 ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr); 2531 ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr); 2532 ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr); 2533 ierr = PetscFree(redund->matseq);CHKERRQ(ierr); 2534 } else { 2535 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2536 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2537 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2538 for (i=0; i<redund->nrecvs; i++) { 2539 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2540 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2541 } 2542 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2543 } 2544 2545 if (redund->psubcomm) { 2546 ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr); 2547 } 2548 ierr = redund->Destroy(A);CHKERRQ(ierr); 2549 ierr = PetscFree(redund);CHKERRQ(ierr); 2550 } 2551 PetscFunctionReturn(0); 2552 } 2553 2554 #undef __FUNCT__ 2555 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_psubcomm" 2556 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_psubcomm(Mat mat,PetscInt nsubcomm,PetscSubcomm psubcomm,MatReuse reuse,Mat *matredundant) 2557 { 2558 PetscMPIInt rank,size; 2559 MPI_Comm comm,subcomm=psubcomm->comm; 2560 PetscErrorCode ierr; 2561 PetscInt nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N; 2562 PetscMPIInt *send_rank= NULL,*recv_rank=NULL,subrank,subsize; 2563 PetscInt *rowrange = mat->rmap->range; 2564 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2565 Mat A = aij->A,B=aij->B,C=*matredundant; 2566 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2567 PetscScalar *sbuf_a; 2568 PetscInt nzlocal=a->nz+b->nz; 2569 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2570 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray; 2571 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2572 MatScalar *aworkA,*aworkB; 2573 PetscScalar *vals; 2574 PetscMPIInt tag1,tag2,tag3,imdex; 2575 MPI_Request *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL; 2576 MPI_Request *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL; 2577 MPI_Status recv_status,*send_status; 2578 PetscInt *sbuf_nz=NULL,*rbuf_nz=NULL,count; 2579 PetscInt **rbuf_j=NULL; 2580 PetscScalar **rbuf_a=NULL; 2581 Mat_Redundant *redund =NULL; 2582 PetscBool flg=PETSC_FALSE; 2583 2584 PetscFunctionBegin; 2585 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2586 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2587 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2588 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2589 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2590 2591 /* ---------- new imples: use MatGetSubMatrices() ------------*/ 2592 ierr = PetscOptionsGetBool(NULL,"-new",&flg,NULL);CHKERRQ(ierr); 2593 if (flg) { 2594 Mat *matseq; 2595 IS isrow,iscol; 2596 PetscInt mloc_sub,rstart,rend; 2597 2598 if (reuse == MAT_INITIAL_MATRIX) { 2599 /* create a local sequential matrix matseq[0] */ 2600 mloc_sub = PETSC_DECIDE; 2601 ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr); 2602 ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr); 2603 rstart = rend - mloc_sub; 2604 /* printf("[%d] Use MatGetSubMatrices()...rows %d - %d, mloc_sub %d\n",rank,rstart,rend,mloc_sub); */ 2605 ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr); 2606 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr); 2607 } else { /* reuse == MAT_REUSE_MATRIX */ 2608 if (subsize == 1) { 2609 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2610 redund = c->redundant; 2611 } else { 2612 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2613 redund = c->redundant; 2614 } 2615 2616 isrow = redund->isrow; 2617 iscol = redund->iscol; 2618 matseq = redund->matseq; 2619 } 2620 2621 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr); 2622 /* 2623 if (rank==0) { 2624 printf("[%d] matsub:\n",rank); 2625 ierr = MatView(matseq[0],PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 2626 } 2627 ierr = MPI_Barrier(comm);CHKERRQ(ierr); 2628 */ 2629 2630 /* Create matredundant by concatenating matseq[0] from processors in this subcomm */ 2631 /* 2632 if (reuse == MAT_REUSE_MATRIX) { 2633 if (!rank) printf("matredundant:\n"); 2634 ierr = MatView(*matredundant,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 2635 } 2636 */ 2637 ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr); 2638 /* 2639 if (nsubcomm == 1) { 2640 if (!rank) printf( "matredundant\n"); 2641 ierr = MatView(*matredundant,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 2642 } */ 2643 2644 if (reuse == MAT_INITIAL_MATRIX) { 2645 /* create a supporting struct and attach it to C for reuse */ 2646 ierr = PetscNewLog(*matredundant,Mat_Redundant,&redund);CHKERRQ(ierr); 2647 if (subsize == 1) { 2648 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2649 c->redundant = redund; 2650 } else { 2651 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2652 c->redundant = redund; 2653 } 2654 2655 redund->isrow = isrow; 2656 redund->iscol = iscol; 2657 redund->matseq = matseq; 2658 redund->psubcomm = NULL; 2659 2660 redund->Destroy = (*matredundant)->ops->destroy; 2661 (*matredundant)->ops->destroy = MatDestroy_MatRedundant; 2662 } 2663 PetscFunctionReturn(0); 2664 } 2665 /* ----------------------------------------------------*/ 2666 2667 if (reuse == MAT_REUSE_MATRIX) { 2668 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2669 if (subsize == 1) { 2670 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2671 redund = c->redundant; 2672 } else { 2673 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2674 redund = c->redundant; 2675 } 2676 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2677 2678 nsends = redund->nsends; 2679 nrecvs = redund->nrecvs; 2680 send_rank = redund->send_rank; 2681 recv_rank = redund->recv_rank; 2682 sbuf_nz = redund->sbuf_nz; 2683 rbuf_nz = redund->rbuf_nz; 2684 sbuf_j = redund->sbuf_j; 2685 sbuf_a = redund->sbuf_a; 2686 rbuf_j = redund->rbuf_j; 2687 rbuf_a = redund->rbuf_a; 2688 } 2689 2690 if (reuse == MAT_INITIAL_MATRIX) { 2691 PetscInt nleftover,np_subcomm; 2692 2693 /* get the destination processors' id send_rank, nsends and nrecvs */ 2694 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);CHKERRQ(ierr); 2695 2696 np_subcomm = size/nsubcomm; 2697 nleftover = size - nsubcomm*np_subcomm; 2698 2699 nsends = 0; nrecvs = 0; 2700 if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) { 2701 /* -------------------------------------------*/ 2702 for (i=0; i<size; i++) { 2703 if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */ 2704 send_rank[nsends] = i; nsends++; 2705 recv_rank[nrecvs++] = i; 2706 /* printf("[%d] send to and recv from [%d]\n",rank,i); */ 2707 } 2708 } 2709 if (rank >= size - nleftover) { /* this proc is a leftover processor */ 2710 i = size-nleftover-1; 2711 j = 0; 2712 while (j < nsubcomm - nleftover) { 2713 send_rank[nsends++] = i; 2714 i--; j++; 2715 /* printf("[%d] send to [%d]\n",rank,i); */ 2716 } 2717 } 2718 2719 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */ 2720 for (i=0; i<nleftover; i++) { 2721 recv_rank[nrecvs++] = size-nleftover+i; 2722 /* printf("[%d] recv from [%d]\n",rank,i); */ 2723 } 2724 } 2725 } else if (psubcomm->type == PETSC_SUBCOMM_CONTIGUOUS) { 2726 /* --------------------------------------------------*/ 2727 PetscInt color,subcommstart; 2728 subcommstart=0; 2729 for (color=0; color<nsubcomm; color++) { 2730 if (psubcomm->color != color) { 2731 for (i=0; i<psubcomm->subsize[color]; i++) { 2732 if (subrank == i) { /* my_subrank == other's subrank */ 2733 send_rank[nsends++] = subcommstart+i; 2734 recv_rank[nrecvs++] = subcommstart+i; 2735 /* printf("[%d] send to and recv from [%d]\n",rank,subcommstart+i); */ 2736 } 2737 } 2738 } 2739 subcommstart += psubcomm->subsize[color]; 2740 } 2741 if (nleftover && subrank == size/nsubcomm) { /* this proc is a leftover proc, send to subcomm that does not have leftover proc */ 2742 subcommstart=0; 2743 for (color=0; color<nsubcomm; color++) { 2744 subcommstart += psubcomm->subsize[color]; 2745 if (psubcomm->color == color) continue; 2746 if (psubcomm->subsize[color] == size/nsubcomm) { /* subcomm does not have leftover proc */ 2747 send_rank[nsends++] = subcommstart -1; /* send to the last proc of subcomm[color] */ 2748 /* printf("[%d] leftover send to [%d] \n",rank,subcommstart -1); */ 2749 } 2750 } 2751 } 2752 2753 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */ 2754 subcommstart=0; 2755 for (color=0; color<nsubcomm; color++) { 2756 subcommstart += psubcomm->subsize[color]; 2757 if (psubcomm->subsize[color] > size/nsubcomm) { /* subcomm has leftover proc */ 2758 recv_rank[nrecvs++] = subcommstart -1; /* recv from the last proc of subcomm[color] */ 2759 /* printf("[%d] recv from [%d]\n",rank,subcommstart -1); */ 2760 } 2761 } 2762 } 2763 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for PetscSubcomm type %D",psubcomm->type); 2764 2765 /* allocate sbuf_j, sbuf_a */ 2766 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2767 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2768 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2769 /* 2770 ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr); 2771 ierr = PetscSynchronizedFlush(comm);CHKERRQ(ierr); 2772 */ 2773 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2774 2775 /* copy mat's local entries into the buffers */ 2776 if (reuse == MAT_INITIAL_MATRIX) { 2777 rownz_max = 0; 2778 rptr = sbuf_j; 2779 cols = sbuf_j + rend-rstart + 1; 2780 vals = sbuf_a; 2781 rptr[0] = 0; 2782 for (i=0; i<rend-rstart; i++) { 2783 row = i + rstart; 2784 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2785 ncols = nzA + nzB; 2786 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2787 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2788 /* load the column indices for this row into cols */ 2789 lwrite = 0; 2790 for (l=0; l<nzB; l++) { 2791 if ((ctmp = bmap[cworkB[l]]) < cstart) { 2792 vals[lwrite] = aworkB[l]; 2793 cols[lwrite++] = ctmp; 2794 } 2795 } 2796 for (l=0; l<nzA; l++) { 2797 vals[lwrite] = aworkA[l]; 2798 cols[lwrite++] = cstart + cworkA[l]; 2799 } 2800 for (l=0; l<nzB; l++) { 2801 if ((ctmp = bmap[cworkB[l]]) >= cend) { 2802 vals[lwrite] = aworkB[l]; 2803 cols[lwrite++] = ctmp; 2804 } 2805 } 2806 vals += ncols; 2807 cols += ncols; 2808 rptr[i+1] = rptr[i] + ncols; 2809 if (rownz_max < ncols) rownz_max = ncols; 2810 } 2811 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2812 } else { /* only copy matrix values into sbuf_a */ 2813 rptr = sbuf_j; 2814 vals = sbuf_a; 2815 rptr[0] = 0; 2816 for (i=0; i<rend-rstart; i++) { 2817 row = i + rstart; 2818 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2819 ncols = nzA + nzB; 2820 cworkB = b->j + b->i[i]; 2821 aworkA = a->a + a->i[i]; 2822 aworkB = b->a + b->i[i]; 2823 lwrite = 0; 2824 for (l=0; l<nzB; l++) { 2825 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2826 } 2827 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2828 for (l=0; l<nzB; l++) { 2829 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2830 } 2831 vals += ncols; 2832 rptr[i+1] = rptr[i] + ncols; 2833 } 2834 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2835 2836 /* send nzlocal to others, and recv other's nzlocal */ 2837 /*--------------------------------------------------*/ 2838 if (reuse == MAT_INITIAL_MATRIX) { 2839 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2840 2841 s_waits2 = s_waits3 + nsends; 2842 s_waits1 = s_waits2 + nsends; 2843 r_waits1 = s_waits1 + nsends; 2844 r_waits2 = r_waits1 + nrecvs; 2845 r_waits3 = r_waits2 + nrecvs; 2846 } else { 2847 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2848 2849 r_waits3 = s_waits3 + nsends; 2850 } 2851 2852 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2853 if (reuse == MAT_INITIAL_MATRIX) { 2854 /* get new tags to keep the communication clean */ 2855 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2856 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2857 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2858 2859 /* post receives of other's nzlocal */ 2860 for (i=0; i<nrecvs; i++) { 2861 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2862 } 2863 /* send nzlocal to others */ 2864 for (i=0; i<nsends; i++) { 2865 sbuf_nz[i] = nzlocal; 2866 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2867 } 2868 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2869 count = nrecvs; 2870 while (count) { 2871 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2872 2873 recv_rank[imdex] = recv_status.MPI_SOURCE; 2874 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2875 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2876 2877 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2878 2879 rbuf_nz[imdex] += i + 2; 2880 2881 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2882 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2883 count--; 2884 } 2885 /* wait on sends of nzlocal */ 2886 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2887 /* send mat->i,j to others, and recv from other's */ 2888 /*------------------------------------------------*/ 2889 for (i=0; i<nsends; i++) { 2890 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2891 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2892 } 2893 /* wait on receives of mat->i,j */ 2894 /*------------------------------*/ 2895 count = nrecvs; 2896 while (count) { 2897 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2898 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2899 count--; 2900 } 2901 /* wait on sends of mat->i,j */ 2902 /*---------------------------*/ 2903 if (nsends) { 2904 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2905 } 2906 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2907 2908 /* post receives, send and receive mat->a */ 2909 /*----------------------------------------*/ 2910 for (imdex=0; imdex<nrecvs; imdex++) { 2911 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2912 } 2913 for (i=0; i<nsends; i++) { 2914 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2915 } 2916 count = nrecvs; 2917 while (count) { 2918 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2919 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2920 count--; 2921 } 2922 if (nsends) { 2923 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2924 } 2925 2926 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2927 2928 /* create redundant matrix */ 2929 /*-------------------------*/ 2930 if (reuse == MAT_INITIAL_MATRIX) { 2931 const PetscInt *range; 2932 PetscInt rstart_sub,rend_sub,mloc_sub; 2933 2934 /* compute rownz_max for preallocation */ 2935 for (imdex=0; imdex<nrecvs; imdex++) { 2936 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2937 rptr = rbuf_j[imdex]; 2938 for (i=0; i<j; i++) { 2939 ncols = rptr[i+1] - rptr[i]; 2940 if (rownz_max < ncols) rownz_max = ncols; 2941 } 2942 } 2943 2944 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2945 2946 /* get local size of redundant matrix 2947 - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */ 2948 ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr); 2949 rstart_sub = range[nsubcomm*subrank]; 2950 if (subrank+1 < subsize) { /* not the last proc in subcomm */ 2951 rend_sub = range[nsubcomm*(subrank+1)]; 2952 } else { 2953 rend_sub = mat->rmap->N; 2954 } 2955 mloc_sub = rend_sub - rstart_sub; 2956 2957 if (M == N) { 2958 ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2959 } else { /* non-square matrix */ 2960 ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr); 2961 } 2962 ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr); 2963 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2964 ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr); 2965 ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr); 2966 } else { 2967 C = *matredundant; 2968 } 2969 2970 /* insert local matrix entries */ 2971 rptr = sbuf_j; 2972 cols = sbuf_j + rend-rstart + 1; 2973 vals = sbuf_a; 2974 for (i=0; i<rend-rstart; i++) { 2975 row = i + rstart; 2976 ncols = rptr[i+1] - rptr[i]; 2977 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2978 vals += ncols; 2979 cols += ncols; 2980 } 2981 /* insert received matrix entries */ 2982 for (imdex=0; imdex<nrecvs; imdex++) { 2983 rstart = rowrange[recv_rank[imdex]]; 2984 rend = rowrange[recv_rank[imdex]+1]; 2985 /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */ 2986 rptr = rbuf_j[imdex]; 2987 cols = rbuf_j[imdex] + rend-rstart + 1; 2988 vals = rbuf_a[imdex]; 2989 for (i=0; i<rend-rstart; i++) { 2990 row = i + rstart; 2991 ncols = rptr[i+1] - rptr[i]; 2992 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2993 vals += ncols; 2994 cols += ncols; 2995 } 2996 } 2997 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2998 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2999 3000 if (reuse == MAT_INITIAL_MATRIX) { 3001 *matredundant = C; 3002 3003 /* create a supporting struct and attach it to C for reuse */ 3004 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 3005 if (subsize == 1) { 3006 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 3007 c->redundant = redund; 3008 } else { 3009 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 3010 c->redundant = redund; 3011 } 3012 3013 redund->nzlocal = nzlocal; 3014 redund->nsends = nsends; 3015 redund->nrecvs = nrecvs; 3016 redund->send_rank = send_rank; 3017 redund->recv_rank = recv_rank; 3018 redund->sbuf_nz = sbuf_nz; 3019 redund->rbuf_nz = rbuf_nz; 3020 redund->sbuf_j = sbuf_j; 3021 redund->sbuf_a = sbuf_a; 3022 redund->rbuf_j = rbuf_j; 3023 redund->rbuf_a = rbuf_a; 3024 redund->psubcomm = NULL; 3025 3026 redund->Destroy = C->ops->destroy; 3027 C->ops->destroy = MatDestroy_MatRedundant; 3028 } 3029 PetscFunctionReturn(0); 3030 } 3031 3032 #undef __FUNCT__ 3033 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 3034 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscSubcomm psubcomm,MatReuse reuse,Mat *matredundant) 3035 { 3036 PetscErrorCode ierr; 3037 MPI_Comm comm; 3038 PetscMPIInt size,subsize; 3039 PetscInt mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N; 3040 Mat_Redundant *redund =NULL; 3041 3042 PetscFunctionBegin; 3043 if (subcomm == MPI_COMM_NULL) { /* use psubcomm */ 3044 if (reuse == MAT_INITIAL_MATRIX) { 3045 if (psubcomm == NULL) { /* create psubcomm */ 3046 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3047 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3048 ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr); 3049 ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr); 3050 ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr); 3051 ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr); 3052 } 3053 subcomm = psubcomm->comm; 3054 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 3055 } else { /* retrieve psubcomm */ 3056 ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr); 3057 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 3058 if (subsize == 1) { 3059 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 3060 psubcomm = c->redundant->psubcomm; 3061 } else { 3062 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 3063 psubcomm = c->redundant->psubcomm; 3064 } 3065 } 3066 } 3067 3068 if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) { 3069 ierr = MatGetRedundantMatrix_MPIAIJ_psubcomm(mat,nsubcomm,psubcomm,reuse,matredundant);CHKERRQ(ierr); 3070 } else { 3071 /* via MatGetSubMatrices() */ 3072 Mat *matseq; 3073 IS isrow,iscol; 3074 3075 if (reuse == MAT_INITIAL_MATRIX) { 3076 /* create a local sequential matrix matseq[0] */ 3077 mloc_sub = PETSC_DECIDE; 3078 ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr); 3079 ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr); 3080 rstart = rend - mloc_sub; 3081 ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr); 3082 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr); 3083 } else { /* reuse == MAT_REUSE_MATRIX */ 3084 if (subsize == 1) { 3085 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 3086 redund = c->redundant; 3087 } else { 3088 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 3089 redund = c->redundant; 3090 } 3091 3092 isrow = redund->isrow; 3093 iscol = redund->iscol; 3094 matseq = redund->matseq; 3095 } 3096 3097 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr); 3098 ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr); 3099 3100 if (reuse == MAT_INITIAL_MATRIX) { 3101 /* create a supporting struct and attach it to C for reuse */ 3102 ierr = PetscNewLog(*matredundant,Mat_Redundant,&redund);CHKERRQ(ierr); 3103 if (subsize == 1) { 3104 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 3105 c->redundant = redund; 3106 } else { 3107 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 3108 c->redundant = redund; 3109 } 3110 redund->isrow = isrow; 3111 redund->iscol = iscol; 3112 redund->matseq = matseq; 3113 redund->psubcomm = NULL; 3114 redund->Destroy = (*matredundant)->ops->destroy; 3115 (*matredundant)->ops->destroy = MatDestroy_MatRedundant; 3116 } 3117 } 3118 3119 if (psubcomm) { /* free psubcomm in MatDestroy_MatRedundant() */ 3120 ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr); 3121 if (subsize == 1) { 3122 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 3123 c->redundant->psubcomm = psubcomm; 3124 } else { 3125 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 3126 c->redundant->psubcomm = psubcomm ; 3127 } 3128 } 3129 PetscFunctionReturn(0); 3130 } 3131 3132 #undef __FUNCT__ 3133 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 3134 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 3135 { 3136 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3137 PetscErrorCode ierr; 3138 PetscInt i,*idxb = 0; 3139 PetscScalar *va,*vb; 3140 Vec vtmp; 3141 3142 PetscFunctionBegin; 3143 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 3144 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 3145 if (idx) { 3146 for (i=0; i<A->rmap->n; i++) { 3147 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 3148 } 3149 } 3150 3151 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 3152 if (idx) { 3153 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 3154 } 3155 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 3156 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 3157 3158 for (i=0; i<A->rmap->n; i++) { 3159 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 3160 va[i] = vb[i]; 3161 if (idx) idx[i] = a->garray[idxb[i]]; 3162 } 3163 } 3164 3165 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 3166 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 3167 ierr = PetscFree(idxb);CHKERRQ(ierr); 3168 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 3169 PetscFunctionReturn(0); 3170 } 3171 3172 #undef __FUNCT__ 3173 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 3174 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 3175 { 3176 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3177 PetscErrorCode ierr; 3178 PetscInt i,*idxb = 0; 3179 PetscScalar *va,*vb; 3180 Vec vtmp; 3181 3182 PetscFunctionBegin; 3183 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 3184 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 3185 if (idx) { 3186 for (i=0; i<A->cmap->n; i++) { 3187 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 3188 } 3189 } 3190 3191 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 3192 if (idx) { 3193 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 3194 } 3195 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 3196 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 3197 3198 for (i=0; i<A->rmap->n; i++) { 3199 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 3200 va[i] = vb[i]; 3201 if (idx) idx[i] = a->garray[idxb[i]]; 3202 } 3203 } 3204 3205 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 3206 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 3207 ierr = PetscFree(idxb);CHKERRQ(ierr); 3208 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 3209 PetscFunctionReturn(0); 3210 } 3211 3212 #undef __FUNCT__ 3213 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 3214 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 3215 { 3216 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 3217 PetscInt n = A->rmap->n; 3218 PetscInt cstart = A->cmap->rstart; 3219 PetscInt *cmap = mat->garray; 3220 PetscInt *diagIdx, *offdiagIdx; 3221 Vec diagV, offdiagV; 3222 PetscScalar *a, *diagA, *offdiagA; 3223 PetscInt r; 3224 PetscErrorCode ierr; 3225 3226 PetscFunctionBegin; 3227 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 3228 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 3229 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 3230 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 3231 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 3232 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 3233 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 3234 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3235 for (r = 0; r < n; ++r) { 3236 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 3237 a[r] = diagA[r]; 3238 idx[r] = cstart + diagIdx[r]; 3239 } else { 3240 a[r] = offdiagA[r]; 3241 idx[r] = cmap[offdiagIdx[r]]; 3242 } 3243 } 3244 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3245 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3246 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3247 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3248 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3249 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3250 PetscFunctionReturn(0); 3251 } 3252 3253 #undef __FUNCT__ 3254 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 3255 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 3256 { 3257 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 3258 PetscInt n = A->rmap->n; 3259 PetscInt cstart = A->cmap->rstart; 3260 PetscInt *cmap = mat->garray; 3261 PetscInt *diagIdx, *offdiagIdx; 3262 Vec diagV, offdiagV; 3263 PetscScalar *a, *diagA, *offdiagA; 3264 PetscInt r; 3265 PetscErrorCode ierr; 3266 3267 PetscFunctionBegin; 3268 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 3269 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 3270 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 3271 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 3272 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 3273 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 3274 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 3275 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3276 for (r = 0; r < n; ++r) { 3277 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 3278 a[r] = diagA[r]; 3279 idx[r] = cstart + diagIdx[r]; 3280 } else { 3281 a[r] = offdiagA[r]; 3282 idx[r] = cmap[offdiagIdx[r]]; 3283 } 3284 } 3285 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3286 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3287 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3288 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3289 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3290 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3291 PetscFunctionReturn(0); 3292 } 3293 3294 #undef __FUNCT__ 3295 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 3296 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 3297 { 3298 PetscErrorCode ierr; 3299 Mat *dummy; 3300 3301 PetscFunctionBegin; 3302 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 3303 *newmat = *dummy; 3304 ierr = PetscFree(dummy);CHKERRQ(ierr); 3305 PetscFunctionReturn(0); 3306 } 3307 3308 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 3309 3310 #undef __FUNCT__ 3311 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 3312 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3313 { 3314 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3315 PetscErrorCode ierr; 3316 3317 PetscFunctionBegin; 3318 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3319 PetscFunctionReturn(0); 3320 } 3321 3322 #undef __FUNCT__ 3323 #define __FUNCT__ "MatSetRandom_MPIAIJ" 3324 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 3325 { 3326 PetscErrorCode ierr; 3327 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 3328 3329 PetscFunctionBegin; 3330 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 3331 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 3332 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3333 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3334 PetscFunctionReturn(0); 3335 } 3336 3337 /* -------------------------------------------------------------------*/ 3338 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3339 MatGetRow_MPIAIJ, 3340 MatRestoreRow_MPIAIJ, 3341 MatMult_MPIAIJ, 3342 /* 4*/ MatMultAdd_MPIAIJ, 3343 MatMultTranspose_MPIAIJ, 3344 MatMultTransposeAdd_MPIAIJ, 3345 #if defined(PETSC_HAVE_PBGL) 3346 MatSolve_MPIAIJ, 3347 #else 3348 0, 3349 #endif 3350 0, 3351 0, 3352 /*10*/ 0, 3353 0, 3354 0, 3355 MatSOR_MPIAIJ, 3356 MatTranspose_MPIAIJ, 3357 /*15*/ MatGetInfo_MPIAIJ, 3358 MatEqual_MPIAIJ, 3359 MatGetDiagonal_MPIAIJ, 3360 MatDiagonalScale_MPIAIJ, 3361 MatNorm_MPIAIJ, 3362 /*20*/ MatAssemblyBegin_MPIAIJ, 3363 MatAssemblyEnd_MPIAIJ, 3364 MatSetOption_MPIAIJ, 3365 MatZeroEntries_MPIAIJ, 3366 /*24*/ MatZeroRows_MPIAIJ, 3367 0, 3368 #if defined(PETSC_HAVE_PBGL) 3369 0, 3370 #else 3371 0, 3372 #endif 3373 0, 3374 0, 3375 /*29*/ MatSetUp_MPIAIJ, 3376 #if defined(PETSC_HAVE_PBGL) 3377 0, 3378 #else 3379 0, 3380 #endif 3381 0, 3382 0, 3383 0, 3384 /*34*/ MatDuplicate_MPIAIJ, 3385 0, 3386 0, 3387 0, 3388 0, 3389 /*39*/ MatAXPY_MPIAIJ, 3390 MatGetSubMatrices_MPIAIJ, 3391 MatIncreaseOverlap_MPIAIJ, 3392 MatGetValues_MPIAIJ, 3393 MatCopy_MPIAIJ, 3394 /*44*/ MatGetRowMax_MPIAIJ, 3395 MatScale_MPIAIJ, 3396 0, 3397 0, 3398 MatZeroRowsColumns_MPIAIJ, 3399 /*49*/ MatSetRandom_MPIAIJ, 3400 0, 3401 0, 3402 0, 3403 0, 3404 /*54*/ MatFDColoringCreate_MPIAIJ, 3405 0, 3406 MatSetUnfactored_MPIAIJ, 3407 MatPermute_MPIAIJ, 3408 0, 3409 /*59*/ MatGetSubMatrix_MPIAIJ, 3410 MatDestroy_MPIAIJ, 3411 MatView_MPIAIJ, 3412 0, 3413 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 3414 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 3415 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 3416 0, 3417 0, 3418 0, 3419 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3420 MatGetRowMinAbs_MPIAIJ, 3421 0, 3422 MatSetColoring_MPIAIJ, 3423 0, 3424 MatSetValuesAdifor_MPIAIJ, 3425 /*75*/ MatFDColoringApply_AIJ, 3426 0, 3427 0, 3428 0, 3429 MatFindZeroDiagonals_MPIAIJ, 3430 /*80*/ 0, 3431 0, 3432 0, 3433 /*83*/ MatLoad_MPIAIJ, 3434 0, 3435 0, 3436 0, 3437 0, 3438 0, 3439 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3440 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3441 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3442 MatPtAP_MPIAIJ_MPIAIJ, 3443 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3444 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 3445 0, 3446 0, 3447 0, 3448 0, 3449 /*99*/ 0, 3450 0, 3451 0, 3452 MatConjugate_MPIAIJ, 3453 0, 3454 /*104*/MatSetValuesRow_MPIAIJ, 3455 MatRealPart_MPIAIJ, 3456 MatImaginaryPart_MPIAIJ, 3457 0, 3458 0, 3459 /*109*/0, 3460 MatGetRedundantMatrix_MPIAIJ, 3461 MatGetRowMin_MPIAIJ, 3462 0, 3463 0, 3464 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3465 0, 3466 0, 3467 0, 3468 0, 3469 /*119*/0, 3470 0, 3471 0, 3472 0, 3473 MatGetMultiProcBlock_MPIAIJ, 3474 /*124*/MatFindNonzeroRows_MPIAIJ, 3475 MatGetColumnNorms_MPIAIJ, 3476 MatInvertBlockDiagonal_MPIAIJ, 3477 0, 3478 MatGetSubMatricesParallel_MPIAIJ, 3479 /*129*/0, 3480 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3481 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3482 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3483 0, 3484 /*134*/0, 3485 0, 3486 0, 3487 0, 3488 0, 3489 /*139*/0, 3490 0 3491 }; 3492 3493 /* ----------------------------------------------------------------------------------------*/ 3494 3495 #undef __FUNCT__ 3496 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3497 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3498 { 3499 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3500 PetscErrorCode ierr; 3501 3502 PetscFunctionBegin; 3503 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3504 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3505 PetscFunctionReturn(0); 3506 } 3507 3508 #undef __FUNCT__ 3509 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3510 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3511 { 3512 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3513 PetscErrorCode ierr; 3514 3515 PetscFunctionBegin; 3516 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3517 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3518 PetscFunctionReturn(0); 3519 } 3520 3521 #undef __FUNCT__ 3522 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3523 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3524 { 3525 Mat_MPIAIJ *b; 3526 PetscErrorCode ierr; 3527 3528 PetscFunctionBegin; 3529 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3530 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3531 b = (Mat_MPIAIJ*)B->data; 3532 3533 if (!B->preallocated) { 3534 /* Explicitly create 2 MATSEQAIJ matrices. */ 3535 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3536 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3537 ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3538 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3539 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3540 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3541 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3542 ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3543 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3544 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3545 } 3546 3547 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3548 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3549 B->preallocated = PETSC_TRUE; 3550 PetscFunctionReturn(0); 3551 } 3552 3553 #undef __FUNCT__ 3554 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3555 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3556 { 3557 Mat mat; 3558 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3559 PetscErrorCode ierr; 3560 3561 PetscFunctionBegin; 3562 *newmat = 0; 3563 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3564 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3565 ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr); 3566 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3567 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3568 a = (Mat_MPIAIJ*)mat->data; 3569 3570 mat->factortype = matin->factortype; 3571 mat->rmap->bs = matin->rmap->bs; 3572 mat->cmap->bs = matin->cmap->bs; 3573 mat->assembled = PETSC_TRUE; 3574 mat->insertmode = NOT_SET_VALUES; 3575 mat->preallocated = PETSC_TRUE; 3576 3577 a->size = oldmat->size; 3578 a->rank = oldmat->rank; 3579 a->donotstash = oldmat->donotstash; 3580 a->roworiented = oldmat->roworiented; 3581 a->rowindices = 0; 3582 a->rowvalues = 0; 3583 a->getrowactive = PETSC_FALSE; 3584 3585 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3586 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3587 3588 if (oldmat->colmap) { 3589 #if defined(PETSC_USE_CTABLE) 3590 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3591 #else 3592 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3593 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3594 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3595 #endif 3596 } else a->colmap = 0; 3597 if (oldmat->garray) { 3598 PetscInt len; 3599 len = oldmat->B->cmap->n; 3600 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3601 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3602 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3603 } else a->garray = 0; 3604 3605 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3606 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3607 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3608 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3609 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3610 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3611 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3612 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3613 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3614 *newmat = mat; 3615 PetscFunctionReturn(0); 3616 } 3617 3618 3619 3620 #undef __FUNCT__ 3621 #define __FUNCT__ "MatLoad_MPIAIJ" 3622 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3623 { 3624 PetscScalar *vals,*svals; 3625 MPI_Comm comm; 3626 PetscErrorCode ierr; 3627 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3628 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3629 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3630 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 3631 PetscInt cend,cstart,n,*rowners,sizesset=1; 3632 int fd; 3633 PetscInt bs = 1; 3634 3635 PetscFunctionBegin; 3636 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3637 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3638 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3639 if (!rank) { 3640 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3641 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3642 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3643 } 3644 3645 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3646 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3647 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3648 3649 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3650 3651 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3652 M = header[1]; N = header[2]; 3653 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3654 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3655 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3656 3657 /* If global sizes are set, check if they are consistent with that given in the file */ 3658 if (sizesset) { 3659 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3660 } 3661 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3662 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3663 3664 /* determine ownership of all (block) rows */ 3665 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3666 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3667 else m = newMat->rmap->n; /* Set by user */ 3668 3669 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3670 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3671 3672 /* First process needs enough room for process with most rows */ 3673 if (!rank) { 3674 mmax = rowners[1]; 3675 for (i=2; i<=size; i++) { 3676 mmax = PetscMax(mmax, rowners[i]); 3677 } 3678 } else mmax = -1; /* unused, but compilers complain */ 3679 3680 rowners[0] = 0; 3681 for (i=2; i<=size; i++) { 3682 rowners[i] += rowners[i-1]; 3683 } 3684 rstart = rowners[rank]; 3685 rend = rowners[rank+1]; 3686 3687 /* distribute row lengths to all processors */ 3688 ierr = PetscMalloc2(m,PetscInt,&ourlens,m,PetscInt,&offlens);CHKERRQ(ierr); 3689 if (!rank) { 3690 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3691 ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3692 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3693 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3694 for (j=0; j<m; j++) { 3695 procsnz[0] += ourlens[j]; 3696 } 3697 for (i=1; i<size; i++) { 3698 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3699 /* calculate the number of nonzeros on each processor */ 3700 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3701 procsnz[i] += rowlengths[j]; 3702 } 3703 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3704 } 3705 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3706 } else { 3707 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3708 } 3709 3710 if (!rank) { 3711 /* determine max buffer needed and allocate it */ 3712 maxnz = 0; 3713 for (i=0; i<size; i++) { 3714 maxnz = PetscMax(maxnz,procsnz[i]); 3715 } 3716 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3717 3718 /* read in my part of the matrix column indices */ 3719 nz = procsnz[0]; 3720 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3721 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3722 3723 /* read in every one elses and ship off */ 3724 for (i=1; i<size; i++) { 3725 nz = procsnz[i]; 3726 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3727 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3728 } 3729 ierr = PetscFree(cols);CHKERRQ(ierr); 3730 } else { 3731 /* determine buffer space needed for message */ 3732 nz = 0; 3733 for (i=0; i<m; i++) { 3734 nz += ourlens[i]; 3735 } 3736 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3737 3738 /* receive message of column indices*/ 3739 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3740 } 3741 3742 /* determine column ownership if matrix is not square */ 3743 if (N != M) { 3744 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3745 else n = newMat->cmap->n; 3746 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3747 cstart = cend - n; 3748 } else { 3749 cstart = rstart; 3750 cend = rend; 3751 n = cend - cstart; 3752 } 3753 3754 /* loop over local rows, determining number of off diagonal entries */ 3755 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3756 jj = 0; 3757 for (i=0; i<m; i++) { 3758 for (j=0; j<ourlens[i]; j++) { 3759 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3760 jj++; 3761 } 3762 } 3763 3764 for (i=0; i<m; i++) { 3765 ourlens[i] -= offlens[i]; 3766 } 3767 if (!sizesset) { 3768 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3769 } 3770 3771 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3772 3773 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3774 3775 for (i=0; i<m; i++) { 3776 ourlens[i] += offlens[i]; 3777 } 3778 3779 if (!rank) { 3780 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3781 3782 /* read in my part of the matrix numerical values */ 3783 nz = procsnz[0]; 3784 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3785 3786 /* insert into matrix */ 3787 jj = rstart; 3788 smycols = mycols; 3789 svals = vals; 3790 for (i=0; i<m; i++) { 3791 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3792 smycols += ourlens[i]; 3793 svals += ourlens[i]; 3794 jj++; 3795 } 3796 3797 /* read in other processors and ship out */ 3798 for (i=1; i<size; i++) { 3799 nz = procsnz[i]; 3800 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3801 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3802 } 3803 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3804 } else { 3805 /* receive numeric values */ 3806 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3807 3808 /* receive message of values*/ 3809 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3810 3811 /* insert into matrix */ 3812 jj = rstart; 3813 smycols = mycols; 3814 svals = vals; 3815 for (i=0; i<m; i++) { 3816 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3817 smycols += ourlens[i]; 3818 svals += ourlens[i]; 3819 jj++; 3820 } 3821 } 3822 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3823 ierr = PetscFree(vals);CHKERRQ(ierr); 3824 ierr = PetscFree(mycols);CHKERRQ(ierr); 3825 ierr = PetscFree(rowners);CHKERRQ(ierr); 3826 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3827 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3828 PetscFunctionReturn(0); 3829 } 3830 3831 #undef __FUNCT__ 3832 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3833 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3834 { 3835 PetscErrorCode ierr; 3836 IS iscol_local; 3837 PetscInt csize; 3838 3839 PetscFunctionBegin; 3840 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3841 if (call == MAT_REUSE_MATRIX) { 3842 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3843 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3844 } else { 3845 PetscInt cbs; 3846 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3847 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3848 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3849 } 3850 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3851 if (call == MAT_INITIAL_MATRIX) { 3852 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3853 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3854 } 3855 PetscFunctionReturn(0); 3856 } 3857 3858 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3859 #undef __FUNCT__ 3860 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3861 /* 3862 Not great since it makes two copies of the submatrix, first an SeqAIJ 3863 in local and then by concatenating the local matrices the end result. 3864 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3865 3866 Note: This requires a sequential iscol with all indices. 3867 */ 3868 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3869 { 3870 PetscErrorCode ierr; 3871 PetscMPIInt rank,size; 3872 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3873 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3874 PetscBool allcolumns, colflag; 3875 Mat M,Mreuse; 3876 MatScalar *vwork,*aa; 3877 MPI_Comm comm; 3878 Mat_SeqAIJ *aij; 3879 3880 PetscFunctionBegin; 3881 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3882 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3883 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3884 3885 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3886 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3887 if (colflag && ncol == mat->cmap->N) { 3888 allcolumns = PETSC_TRUE; 3889 } else { 3890 allcolumns = PETSC_FALSE; 3891 } 3892 if (call == MAT_REUSE_MATRIX) { 3893 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3894 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3895 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3896 } else { 3897 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3898 } 3899 3900 /* 3901 m - number of local rows 3902 n - number of columns (same on all processors) 3903 rstart - first row in new global matrix generated 3904 */ 3905 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3906 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3907 if (call == MAT_INITIAL_MATRIX) { 3908 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3909 ii = aij->i; 3910 jj = aij->j; 3911 3912 /* 3913 Determine the number of non-zeros in the diagonal and off-diagonal 3914 portions of the matrix in order to do correct preallocation 3915 */ 3916 3917 /* first get start and end of "diagonal" columns */ 3918 if (csize == PETSC_DECIDE) { 3919 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3920 if (mglobal == n) { /* square matrix */ 3921 nlocal = m; 3922 } else { 3923 nlocal = n/size + ((n % size) > rank); 3924 } 3925 } else { 3926 nlocal = csize; 3927 } 3928 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3929 rstart = rend - nlocal; 3930 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3931 3932 /* next, compute all the lengths */ 3933 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3934 olens = dlens + m; 3935 for (i=0; i<m; i++) { 3936 jend = ii[i+1] - ii[i]; 3937 olen = 0; 3938 dlen = 0; 3939 for (j=0; j<jend; j++) { 3940 if (*jj < rstart || *jj >= rend) olen++; 3941 else dlen++; 3942 jj++; 3943 } 3944 olens[i] = olen; 3945 dlens[i] = dlen; 3946 } 3947 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3948 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3949 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3950 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3951 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3952 ierr = PetscFree(dlens);CHKERRQ(ierr); 3953 } else { 3954 PetscInt ml,nl; 3955 3956 M = *newmat; 3957 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3958 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3959 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3960 /* 3961 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3962 rather than the slower MatSetValues(). 3963 */ 3964 M->was_assembled = PETSC_TRUE; 3965 M->assembled = PETSC_FALSE; 3966 } 3967 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3968 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3969 ii = aij->i; 3970 jj = aij->j; 3971 aa = aij->a; 3972 for (i=0; i<m; i++) { 3973 row = rstart + i; 3974 nz = ii[i+1] - ii[i]; 3975 cwork = jj; jj += nz; 3976 vwork = aa; aa += nz; 3977 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3978 } 3979 3980 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3981 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3982 *newmat = M; 3983 3984 /* save submatrix used in processor for next request */ 3985 if (call == MAT_INITIAL_MATRIX) { 3986 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3987 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3988 } 3989 PetscFunctionReturn(0); 3990 } 3991 3992 #undef __FUNCT__ 3993 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3994 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3995 { 3996 PetscInt m,cstart, cend,j,nnz,i,d; 3997 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3998 const PetscInt *JJ; 3999 PetscScalar *values; 4000 PetscErrorCode ierr; 4001 4002 PetscFunctionBegin; 4003 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 4004 4005 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 4006 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 4007 m = B->rmap->n; 4008 cstart = B->cmap->rstart; 4009 cend = B->cmap->rend; 4010 rstart = B->rmap->rstart; 4011 4012 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 4013 4014 #if defined(PETSC_USE_DEBUGGING) 4015 for (i=0; i<m; i++) { 4016 nnz = Ii[i+1]- Ii[i]; 4017 JJ = J + Ii[i]; 4018 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 4019 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 4020 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 4021 } 4022 #endif 4023 4024 for (i=0; i<m; i++) { 4025 nnz = Ii[i+1]- Ii[i]; 4026 JJ = J + Ii[i]; 4027 nnz_max = PetscMax(nnz_max,nnz); 4028 d = 0; 4029 for (j=0; j<nnz; j++) { 4030 if (cstart <= JJ[j] && JJ[j] < cend) d++; 4031 } 4032 d_nnz[i] = d; 4033 o_nnz[i] = nnz - d; 4034 } 4035 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 4036 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 4037 4038 if (v) values = (PetscScalar*)v; 4039 else { 4040 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 4041 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 4042 } 4043 4044 for (i=0; i<m; i++) { 4045 ii = i + rstart; 4046 nnz = Ii[i+1]- Ii[i]; 4047 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 4048 } 4049 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4050 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4051 4052 if (!v) { 4053 ierr = PetscFree(values);CHKERRQ(ierr); 4054 } 4055 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 4056 PetscFunctionReturn(0); 4057 } 4058 4059 #undef __FUNCT__ 4060 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 4061 /*@ 4062 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 4063 (the default parallel PETSc format). 4064 4065 Collective on MPI_Comm 4066 4067 Input Parameters: 4068 + B - the matrix 4069 . i - the indices into j for the start of each local row (starts with zero) 4070 . j - the column indices for each local row (starts with zero) 4071 - v - optional values in the matrix 4072 4073 Level: developer 4074 4075 Notes: 4076 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4077 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4078 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4079 4080 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4081 4082 The format which is used for the sparse matrix input, is equivalent to a 4083 row-major ordering.. i.e for the following matrix, the input data expected is 4084 as shown: 4085 4086 1 0 0 4087 2 0 3 P0 4088 ------- 4089 4 5 6 P1 4090 4091 Process0 [P0]: rows_owned=[0,1] 4092 i = {0,1,3} [size = nrow+1 = 2+1] 4093 j = {0,0,2} [size = nz = 6] 4094 v = {1,2,3} [size = nz = 6] 4095 4096 Process1 [P1]: rows_owned=[2] 4097 i = {0,3} [size = nrow+1 = 1+1] 4098 j = {0,1,2} [size = nz = 6] 4099 v = {4,5,6} [size = nz = 6] 4100 4101 .keywords: matrix, aij, compressed row, sparse, parallel 4102 4103 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 4104 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 4105 @*/ 4106 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 4107 { 4108 PetscErrorCode ierr; 4109 4110 PetscFunctionBegin; 4111 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 4112 PetscFunctionReturn(0); 4113 } 4114 4115 #undef __FUNCT__ 4116 #define __FUNCT__ "MatMPIAIJSetPreallocation" 4117 /*@C 4118 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 4119 (the default parallel PETSc format). For good matrix assembly performance 4120 the user should preallocate the matrix storage by setting the parameters 4121 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4122 performance can be increased by more than a factor of 50. 4123 4124 Collective on MPI_Comm 4125 4126 Input Parameters: 4127 + A - the matrix 4128 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4129 (same value is used for all local rows) 4130 . d_nnz - array containing the number of nonzeros in the various rows of the 4131 DIAGONAL portion of the local submatrix (possibly different for each row) 4132 or NULL, if d_nz is used to specify the nonzero structure. 4133 The size of this array is equal to the number of local rows, i.e 'm'. 4134 For matrices that will be factored, you must leave room for (and set) 4135 the diagonal entry even if it is zero. 4136 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4137 submatrix (same value is used for all local rows). 4138 - o_nnz - array containing the number of nonzeros in the various rows of the 4139 OFF-DIAGONAL portion of the local submatrix (possibly different for 4140 each row) or NULL, if o_nz is used to specify the nonzero 4141 structure. The size of this array is equal to the number 4142 of local rows, i.e 'm'. 4143 4144 If the *_nnz parameter is given then the *_nz parameter is ignored 4145 4146 The AIJ format (also called the Yale sparse matrix format or 4147 compressed row storage (CSR)), is fully compatible with standard Fortran 77 4148 storage. The stored row and column indices begin with zero. 4149 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 4150 4151 The parallel matrix is partitioned such that the first m0 rows belong to 4152 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4153 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4154 4155 The DIAGONAL portion of the local submatrix of a processor can be defined 4156 as the submatrix which is obtained by extraction the part corresponding to 4157 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4158 first row that belongs to the processor, r2 is the last row belonging to 4159 the this processor, and c1-c2 is range of indices of the local part of a 4160 vector suitable for applying the matrix to. This is an mxn matrix. In the 4161 common case of a square matrix, the row and column ranges are the same and 4162 the DIAGONAL part is also square. The remaining portion of the local 4163 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4164 4165 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4166 4167 You can call MatGetInfo() to get information on how effective the preallocation was; 4168 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4169 You can also run with the option -info and look for messages with the string 4170 malloc in them to see if additional memory allocation was needed. 4171 4172 Example usage: 4173 4174 Consider the following 8x8 matrix with 34 non-zero values, that is 4175 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4176 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4177 as follows: 4178 4179 .vb 4180 1 2 0 | 0 3 0 | 0 4 4181 Proc0 0 5 6 | 7 0 0 | 8 0 4182 9 0 10 | 11 0 0 | 12 0 4183 ------------------------------------- 4184 13 0 14 | 15 16 17 | 0 0 4185 Proc1 0 18 0 | 19 20 21 | 0 0 4186 0 0 0 | 22 23 0 | 24 0 4187 ------------------------------------- 4188 Proc2 25 26 27 | 0 0 28 | 29 0 4189 30 0 0 | 31 32 33 | 0 34 4190 .ve 4191 4192 This can be represented as a collection of submatrices as: 4193 4194 .vb 4195 A B C 4196 D E F 4197 G H I 4198 .ve 4199 4200 Where the submatrices A,B,C are owned by proc0, D,E,F are 4201 owned by proc1, G,H,I are owned by proc2. 4202 4203 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4204 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4205 The 'M','N' parameters are 8,8, and have the same values on all procs. 4206 4207 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4208 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4209 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4210 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4211 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4212 matrix, ans [DF] as another SeqAIJ matrix. 4213 4214 When d_nz, o_nz parameters are specified, d_nz storage elements are 4215 allocated for every row of the local diagonal submatrix, and o_nz 4216 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4217 One way to choose d_nz and o_nz is to use the max nonzerors per local 4218 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4219 In this case, the values of d_nz,o_nz are: 4220 .vb 4221 proc0 : dnz = 2, o_nz = 2 4222 proc1 : dnz = 3, o_nz = 2 4223 proc2 : dnz = 1, o_nz = 4 4224 .ve 4225 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4226 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4227 for proc3. i.e we are using 12+15+10=37 storage locations to store 4228 34 values. 4229 4230 When d_nnz, o_nnz parameters are specified, the storage is specified 4231 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4232 In the above case the values for d_nnz,o_nnz are: 4233 .vb 4234 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4235 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4236 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4237 .ve 4238 Here the space allocated is sum of all the above values i.e 34, and 4239 hence pre-allocation is perfect. 4240 4241 Level: intermediate 4242 4243 .keywords: matrix, aij, compressed row, sparse, parallel 4244 4245 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4246 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 4247 @*/ 4248 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4249 { 4250 PetscErrorCode ierr; 4251 4252 PetscFunctionBegin; 4253 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4254 PetscValidType(B,1); 4255 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4256 PetscFunctionReturn(0); 4257 } 4258 4259 #undef __FUNCT__ 4260 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 4261 /*@ 4262 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4263 CSR format the local rows. 4264 4265 Collective on MPI_Comm 4266 4267 Input Parameters: 4268 + comm - MPI communicator 4269 . m - number of local rows (Cannot be PETSC_DECIDE) 4270 . n - This value should be the same as the local size used in creating the 4271 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4272 calculated if N is given) For square matrices n is almost always m. 4273 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4274 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4275 . i - row indices 4276 . j - column indices 4277 - a - matrix values 4278 4279 Output Parameter: 4280 . mat - the matrix 4281 4282 Level: intermediate 4283 4284 Notes: 4285 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4286 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4287 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4288 4289 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4290 4291 The format which is used for the sparse matrix input, is equivalent to a 4292 row-major ordering.. i.e for the following matrix, the input data expected is 4293 as shown: 4294 4295 1 0 0 4296 2 0 3 P0 4297 ------- 4298 4 5 6 P1 4299 4300 Process0 [P0]: rows_owned=[0,1] 4301 i = {0,1,3} [size = nrow+1 = 2+1] 4302 j = {0,0,2} [size = nz = 6] 4303 v = {1,2,3} [size = nz = 6] 4304 4305 Process1 [P1]: rows_owned=[2] 4306 i = {0,3} [size = nrow+1 = 1+1] 4307 j = {0,1,2} [size = nz = 6] 4308 v = {4,5,6} [size = nz = 6] 4309 4310 .keywords: matrix, aij, compressed row, sparse, parallel 4311 4312 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4313 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4314 @*/ 4315 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4316 { 4317 PetscErrorCode ierr; 4318 4319 PetscFunctionBegin; 4320 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4321 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4322 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4323 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4324 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4325 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4326 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4327 PetscFunctionReturn(0); 4328 } 4329 4330 #undef __FUNCT__ 4331 #define __FUNCT__ "MatCreateAIJ" 4332 /*@C 4333 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4334 (the default parallel PETSc format). For good matrix assembly performance 4335 the user should preallocate the matrix storage by setting the parameters 4336 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4337 performance can be increased by more than a factor of 50. 4338 4339 Collective on MPI_Comm 4340 4341 Input Parameters: 4342 + comm - MPI communicator 4343 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4344 This value should be the same as the local size used in creating the 4345 y vector for the matrix-vector product y = Ax. 4346 . n - This value should be the same as the local size used in creating the 4347 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4348 calculated if N is given) For square matrices n is almost always m. 4349 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4350 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4351 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4352 (same value is used for all local rows) 4353 . d_nnz - array containing the number of nonzeros in the various rows of the 4354 DIAGONAL portion of the local submatrix (possibly different for each row) 4355 or NULL, if d_nz is used to specify the nonzero structure. 4356 The size of this array is equal to the number of local rows, i.e 'm'. 4357 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4358 submatrix (same value is used for all local rows). 4359 - o_nnz - array containing the number of nonzeros in the various rows of the 4360 OFF-DIAGONAL portion of the local submatrix (possibly different for 4361 each row) or NULL, if o_nz is used to specify the nonzero 4362 structure. The size of this array is equal to the number 4363 of local rows, i.e 'm'. 4364 4365 Output Parameter: 4366 . A - the matrix 4367 4368 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4369 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4370 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4371 4372 Notes: 4373 If the *_nnz parameter is given then the *_nz parameter is ignored 4374 4375 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4376 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4377 storage requirements for this matrix. 4378 4379 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4380 processor than it must be used on all processors that share the object for 4381 that argument. 4382 4383 The user MUST specify either the local or global matrix dimensions 4384 (possibly both). 4385 4386 The parallel matrix is partitioned across processors such that the 4387 first m0 rows belong to process 0, the next m1 rows belong to 4388 process 1, the next m2 rows belong to process 2 etc.. where 4389 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4390 values corresponding to [m x N] submatrix. 4391 4392 The columns are logically partitioned with the n0 columns belonging 4393 to 0th partition, the next n1 columns belonging to the next 4394 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4395 4396 The DIAGONAL portion of the local submatrix on any given processor 4397 is the submatrix corresponding to the rows and columns m,n 4398 corresponding to the given processor. i.e diagonal matrix on 4399 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4400 etc. The remaining portion of the local submatrix [m x (N-n)] 4401 constitute the OFF-DIAGONAL portion. The example below better 4402 illustrates this concept. 4403 4404 For a square global matrix we define each processor's diagonal portion 4405 to be its local rows and the corresponding columns (a square submatrix); 4406 each processor's off-diagonal portion encompasses the remainder of the 4407 local matrix (a rectangular submatrix). 4408 4409 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4410 4411 When calling this routine with a single process communicator, a matrix of 4412 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4413 type of communicator, use the construction mechanism: 4414 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4415 4416 By default, this format uses inodes (identical nodes) when possible. 4417 We search for consecutive rows with the same nonzero structure, thereby 4418 reusing matrix information to achieve increased efficiency. 4419 4420 Options Database Keys: 4421 + -mat_no_inode - Do not use inodes 4422 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4423 - -mat_aij_oneindex - Internally use indexing starting at 1 4424 rather than 0. Note that when calling MatSetValues(), 4425 the user still MUST index entries starting at 0! 4426 4427 4428 Example usage: 4429 4430 Consider the following 8x8 matrix with 34 non-zero values, that is 4431 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4432 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4433 as follows: 4434 4435 .vb 4436 1 2 0 | 0 3 0 | 0 4 4437 Proc0 0 5 6 | 7 0 0 | 8 0 4438 9 0 10 | 11 0 0 | 12 0 4439 ------------------------------------- 4440 13 0 14 | 15 16 17 | 0 0 4441 Proc1 0 18 0 | 19 20 21 | 0 0 4442 0 0 0 | 22 23 0 | 24 0 4443 ------------------------------------- 4444 Proc2 25 26 27 | 0 0 28 | 29 0 4445 30 0 0 | 31 32 33 | 0 34 4446 .ve 4447 4448 This can be represented as a collection of submatrices as: 4449 4450 .vb 4451 A B C 4452 D E F 4453 G H I 4454 .ve 4455 4456 Where the submatrices A,B,C are owned by proc0, D,E,F are 4457 owned by proc1, G,H,I are owned by proc2. 4458 4459 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4460 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4461 The 'M','N' parameters are 8,8, and have the same values on all procs. 4462 4463 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4464 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4465 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4466 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4467 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4468 matrix, ans [DF] as another SeqAIJ matrix. 4469 4470 When d_nz, o_nz parameters are specified, d_nz storage elements are 4471 allocated for every row of the local diagonal submatrix, and o_nz 4472 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4473 One way to choose d_nz and o_nz is to use the max nonzerors per local 4474 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4475 In this case, the values of d_nz,o_nz are: 4476 .vb 4477 proc0 : dnz = 2, o_nz = 2 4478 proc1 : dnz = 3, o_nz = 2 4479 proc2 : dnz = 1, o_nz = 4 4480 .ve 4481 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4482 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4483 for proc3. i.e we are using 12+15+10=37 storage locations to store 4484 34 values. 4485 4486 When d_nnz, o_nnz parameters are specified, the storage is specified 4487 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4488 In the above case the values for d_nnz,o_nnz are: 4489 .vb 4490 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4491 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4492 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4493 .ve 4494 Here the space allocated is sum of all the above values i.e 34, and 4495 hence pre-allocation is perfect. 4496 4497 Level: intermediate 4498 4499 .keywords: matrix, aij, compressed row, sparse, parallel 4500 4501 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4502 MPIAIJ, MatCreateMPIAIJWithArrays() 4503 @*/ 4504 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4505 { 4506 PetscErrorCode ierr; 4507 PetscMPIInt size; 4508 4509 PetscFunctionBegin; 4510 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4511 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4512 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4513 if (size > 1) { 4514 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4515 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4516 } else { 4517 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4518 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4519 } 4520 PetscFunctionReturn(0); 4521 } 4522 4523 #undef __FUNCT__ 4524 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4525 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4526 { 4527 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4528 4529 PetscFunctionBegin; 4530 *Ad = a->A; 4531 *Ao = a->B; 4532 *colmap = a->garray; 4533 PetscFunctionReturn(0); 4534 } 4535 4536 #undef __FUNCT__ 4537 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4538 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4539 { 4540 PetscErrorCode ierr; 4541 PetscInt i; 4542 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4543 4544 PetscFunctionBegin; 4545 if (coloring->ctype == IS_COLORING_GLOBAL) { 4546 ISColoringValue *allcolors,*colors; 4547 ISColoring ocoloring; 4548 4549 /* set coloring for diagonal portion */ 4550 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4551 4552 /* set coloring for off-diagonal portion */ 4553 ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr); 4554 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4555 for (i=0; i<a->B->cmap->n; i++) { 4556 colors[i] = allcolors[a->garray[i]]; 4557 } 4558 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4559 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4560 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4561 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4562 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4563 ISColoringValue *colors; 4564 PetscInt *larray; 4565 ISColoring ocoloring; 4566 4567 /* set coloring for diagonal portion */ 4568 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4569 for (i=0; i<a->A->cmap->n; i++) { 4570 larray[i] = i + A->cmap->rstart; 4571 } 4572 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr); 4573 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4574 for (i=0; i<a->A->cmap->n; i++) { 4575 colors[i] = coloring->colors[larray[i]]; 4576 } 4577 ierr = PetscFree(larray);CHKERRQ(ierr); 4578 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4579 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4580 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4581 4582 /* set coloring for off-diagonal portion */ 4583 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4584 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr); 4585 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4586 for (i=0; i<a->B->cmap->n; i++) { 4587 colors[i] = coloring->colors[larray[i]]; 4588 } 4589 ierr = PetscFree(larray);CHKERRQ(ierr); 4590 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4591 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4592 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4593 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4594 PetscFunctionReturn(0); 4595 } 4596 4597 #undef __FUNCT__ 4598 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4599 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4600 { 4601 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4602 PetscErrorCode ierr; 4603 4604 PetscFunctionBegin; 4605 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4606 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4607 PetscFunctionReturn(0); 4608 } 4609 4610 #undef __FUNCT__ 4611 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4612 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4613 { 4614 PetscErrorCode ierr; 4615 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4616 PetscInt *indx; 4617 4618 PetscFunctionBegin; 4619 /* This routine will ONLY return MPIAIJ type matrix */ 4620 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4621 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4622 if (n == PETSC_DECIDE) { 4623 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4624 } 4625 /* Check sum(n) = N */ 4626 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4627 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4628 4629 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4630 rstart -= m; 4631 4632 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4633 for (i=0; i<m; i++) { 4634 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4635 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4636 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4637 } 4638 4639 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4640 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4641 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4642 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4643 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4644 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4645 PetscFunctionReturn(0); 4646 } 4647 4648 #undef __FUNCT__ 4649 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4650 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4651 { 4652 PetscErrorCode ierr; 4653 PetscInt m,N,i,rstart,nnz,Ii; 4654 PetscInt *indx; 4655 PetscScalar *values; 4656 4657 PetscFunctionBegin; 4658 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4659 ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr); 4660 for (i=0; i<m; i++) { 4661 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4662 Ii = i + rstart; 4663 ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4664 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4665 } 4666 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4667 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4668 PetscFunctionReturn(0); 4669 } 4670 4671 #undef __FUNCT__ 4672 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4673 /*@ 4674 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4675 matrices from each processor 4676 4677 Collective on MPI_Comm 4678 4679 Input Parameters: 4680 + comm - the communicators the parallel matrix will live on 4681 . inmat - the input sequential matrices 4682 . n - number of local columns (or PETSC_DECIDE) 4683 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4684 4685 Output Parameter: 4686 . outmat - the parallel matrix generated 4687 4688 Level: advanced 4689 4690 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4691 4692 @*/ 4693 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4694 { 4695 PetscErrorCode ierr; 4696 4697 PetscFunctionBegin; 4698 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4699 if (scall == MAT_INITIAL_MATRIX) { 4700 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4701 } 4702 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4703 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4704 PetscFunctionReturn(0); 4705 } 4706 4707 #undef __FUNCT__ 4708 #define __FUNCT__ "MatFileSplit" 4709 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4710 { 4711 PetscErrorCode ierr; 4712 PetscMPIInt rank; 4713 PetscInt m,N,i,rstart,nnz; 4714 size_t len; 4715 const PetscInt *indx; 4716 PetscViewer out; 4717 char *name; 4718 Mat B; 4719 const PetscScalar *values; 4720 4721 PetscFunctionBegin; 4722 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4723 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4724 /* Should this be the type of the diagonal block of A? */ 4725 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4726 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4727 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 4728 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4729 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4730 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4731 for (i=0; i<m; i++) { 4732 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4733 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4734 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4735 } 4736 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4737 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4738 4739 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4740 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4741 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4742 sprintf(name,"%s.%d",outfile,rank); 4743 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4744 ierr = PetscFree(name);CHKERRQ(ierr); 4745 ierr = MatView(B,out);CHKERRQ(ierr); 4746 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4747 ierr = MatDestroy(&B);CHKERRQ(ierr); 4748 PetscFunctionReturn(0); 4749 } 4750 4751 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4752 #undef __FUNCT__ 4753 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4754 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4755 { 4756 PetscErrorCode ierr; 4757 Mat_Merge_SeqsToMPI *merge; 4758 PetscContainer container; 4759 4760 PetscFunctionBegin; 4761 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4762 if (container) { 4763 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4764 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4765 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4766 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4767 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4768 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4769 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4770 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4771 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4772 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4773 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4774 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4775 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4776 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4777 ierr = PetscFree(merge);CHKERRQ(ierr); 4778 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4779 } 4780 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4781 PetscFunctionReturn(0); 4782 } 4783 4784 #include <../src/mat/utils/freespace.h> 4785 #include <petscbt.h> 4786 4787 #undef __FUNCT__ 4788 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4789 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4790 { 4791 PetscErrorCode ierr; 4792 MPI_Comm comm; 4793 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4794 PetscMPIInt size,rank,taga,*len_s; 4795 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4796 PetscInt proc,m; 4797 PetscInt **buf_ri,**buf_rj; 4798 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4799 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4800 MPI_Request *s_waits,*r_waits; 4801 MPI_Status *status; 4802 MatScalar *aa=a->a; 4803 MatScalar **abuf_r,*ba_i; 4804 Mat_Merge_SeqsToMPI *merge; 4805 PetscContainer container; 4806 4807 PetscFunctionBegin; 4808 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4809 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4810 4811 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4812 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4813 4814 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4815 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4816 4817 bi = merge->bi; 4818 bj = merge->bj; 4819 buf_ri = merge->buf_ri; 4820 buf_rj = merge->buf_rj; 4821 4822 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4823 owners = merge->rowmap->range; 4824 len_s = merge->len_s; 4825 4826 /* send and recv matrix values */ 4827 /*-----------------------------*/ 4828 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4829 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4830 4831 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4832 for (proc=0,k=0; proc<size; proc++) { 4833 if (!len_s[proc]) continue; 4834 i = owners[proc]; 4835 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4836 k++; 4837 } 4838 4839 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4840 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4841 ierr = PetscFree(status);CHKERRQ(ierr); 4842 4843 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4844 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4845 4846 /* insert mat values of mpimat */ 4847 /*----------------------------*/ 4848 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4849 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4850 4851 for (k=0; k<merge->nrecv; k++) { 4852 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4853 nrows = *(buf_ri_k[k]); 4854 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4855 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4856 } 4857 4858 /* set values of ba */ 4859 m = merge->rowmap->n; 4860 for (i=0; i<m; i++) { 4861 arow = owners[rank] + i; 4862 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4863 bnzi = bi[i+1] - bi[i]; 4864 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4865 4866 /* add local non-zero vals of this proc's seqmat into ba */ 4867 anzi = ai[arow+1] - ai[arow]; 4868 aj = a->j + ai[arow]; 4869 aa = a->a + ai[arow]; 4870 nextaj = 0; 4871 for (j=0; nextaj<anzi; j++) { 4872 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4873 ba_i[j] += aa[nextaj++]; 4874 } 4875 } 4876 4877 /* add received vals into ba */ 4878 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4879 /* i-th row */ 4880 if (i == *nextrow[k]) { 4881 anzi = *(nextai[k]+1) - *nextai[k]; 4882 aj = buf_rj[k] + *(nextai[k]); 4883 aa = abuf_r[k] + *(nextai[k]); 4884 nextaj = 0; 4885 for (j=0; nextaj<anzi; j++) { 4886 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4887 ba_i[j] += aa[nextaj++]; 4888 } 4889 } 4890 nextrow[k]++; nextai[k]++; 4891 } 4892 } 4893 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4894 } 4895 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4896 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4897 4898 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4899 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4900 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4901 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4902 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4903 PetscFunctionReturn(0); 4904 } 4905 4906 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4907 4908 #undef __FUNCT__ 4909 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4910 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4911 { 4912 PetscErrorCode ierr; 4913 Mat B_mpi; 4914 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4915 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4916 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4917 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4918 PetscInt len,proc,*dnz,*onz,bs,cbs; 4919 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4920 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4921 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4922 MPI_Status *status; 4923 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4924 PetscBT lnkbt; 4925 Mat_Merge_SeqsToMPI *merge; 4926 PetscContainer container; 4927 4928 PetscFunctionBegin; 4929 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4930 4931 /* make sure it is a PETSc comm */ 4932 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4933 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4934 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4935 4936 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4937 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4938 4939 /* determine row ownership */ 4940 /*---------------------------------------------------------*/ 4941 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4942 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4943 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4944 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4945 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4946 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4947 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4948 4949 m = merge->rowmap->n; 4950 owners = merge->rowmap->range; 4951 4952 /* determine the number of messages to send, their lengths */ 4953 /*---------------------------------------------------------*/ 4954 len_s = merge->len_s; 4955 4956 len = 0; /* length of buf_si[] */ 4957 merge->nsend = 0; 4958 for (proc=0; proc<size; proc++) { 4959 len_si[proc] = 0; 4960 if (proc == rank) { 4961 len_s[proc] = 0; 4962 } else { 4963 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4964 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4965 } 4966 if (len_s[proc]) { 4967 merge->nsend++; 4968 nrows = 0; 4969 for (i=owners[proc]; i<owners[proc+1]; i++) { 4970 if (ai[i+1] > ai[i]) nrows++; 4971 } 4972 len_si[proc] = 2*(nrows+1); 4973 len += len_si[proc]; 4974 } 4975 } 4976 4977 /* determine the number and length of messages to receive for ij-structure */ 4978 /*-------------------------------------------------------------------------*/ 4979 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4980 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4981 4982 /* post the Irecv of j-structure */ 4983 /*-------------------------------*/ 4984 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4985 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4986 4987 /* post the Isend of j-structure */ 4988 /*--------------------------------*/ 4989 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4990 4991 for (proc=0, k=0; proc<size; proc++) { 4992 if (!len_s[proc]) continue; 4993 i = owners[proc]; 4994 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4995 k++; 4996 } 4997 4998 /* receives and sends of j-structure are complete */ 4999 /*------------------------------------------------*/ 5000 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 5001 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 5002 5003 /* send and recv i-structure */ 5004 /*---------------------------*/ 5005 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 5006 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 5007 5008 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 5009 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 5010 for (proc=0,k=0; proc<size; proc++) { 5011 if (!len_s[proc]) continue; 5012 /* form outgoing message for i-structure: 5013 buf_si[0]: nrows to be sent 5014 [1:nrows]: row index (global) 5015 [nrows+1:2*nrows+1]: i-structure index 5016 */ 5017 /*-------------------------------------------*/ 5018 nrows = len_si[proc]/2 - 1; 5019 buf_si_i = buf_si + nrows+1; 5020 buf_si[0] = nrows; 5021 buf_si_i[0] = 0; 5022 nrows = 0; 5023 for (i=owners[proc]; i<owners[proc+1]; i++) { 5024 anzi = ai[i+1] - ai[i]; 5025 if (anzi) { 5026 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 5027 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 5028 nrows++; 5029 } 5030 } 5031 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 5032 k++; 5033 buf_si += len_si[proc]; 5034 } 5035 5036 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 5037 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 5038 5039 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 5040 for (i=0; i<merge->nrecv; i++) { 5041 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 5042 } 5043 5044 ierr = PetscFree(len_si);CHKERRQ(ierr); 5045 ierr = PetscFree(len_ri);CHKERRQ(ierr); 5046 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 5047 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 5048 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 5049 ierr = PetscFree(buf_s);CHKERRQ(ierr); 5050 ierr = PetscFree(status);CHKERRQ(ierr); 5051 5052 /* compute a local seq matrix in each processor */ 5053 /*----------------------------------------------*/ 5054 /* allocate bi array and free space for accumulating nonzero column info */ 5055 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 5056 bi[0] = 0; 5057 5058 /* create and initialize a linked list */ 5059 nlnk = N+1; 5060 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 5061 5062 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 5063 len = ai[owners[rank+1]] - ai[owners[rank]]; 5064 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 5065 5066 current_space = free_space; 5067 5068 /* determine symbolic info for each local row */ 5069 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 5070 5071 for (k=0; k<merge->nrecv; k++) { 5072 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 5073 nrows = *buf_ri_k[k]; 5074 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 5075 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 5076 } 5077 5078 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 5079 len = 0; 5080 for (i=0; i<m; i++) { 5081 bnzi = 0; 5082 /* add local non-zero cols of this proc's seqmat into lnk */ 5083 arow = owners[rank] + i; 5084 anzi = ai[arow+1] - ai[arow]; 5085 aj = a->j + ai[arow]; 5086 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 5087 bnzi += nlnk; 5088 /* add received col data into lnk */ 5089 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 5090 if (i == *nextrow[k]) { /* i-th row */ 5091 anzi = *(nextai[k]+1) - *nextai[k]; 5092 aj = buf_rj[k] + *nextai[k]; 5093 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 5094 bnzi += nlnk; 5095 nextrow[k]++; nextai[k]++; 5096 } 5097 } 5098 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 5099 5100 /* if free space is not available, make more free space */ 5101 if (current_space->local_remaining<bnzi) { 5102 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 5103 nspacedouble++; 5104 } 5105 /* copy data into free space, then initialize lnk */ 5106 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 5107 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 5108 5109 current_space->array += bnzi; 5110 current_space->local_used += bnzi; 5111 current_space->local_remaining -= bnzi; 5112 5113 bi[i+1] = bi[i] + bnzi; 5114 } 5115 5116 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 5117 5118 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 5119 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 5120 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 5121 5122 /* create symbolic parallel matrix B_mpi */ 5123 /*---------------------------------------*/ 5124 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 5125 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 5126 if (n==PETSC_DECIDE) { 5127 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 5128 } else { 5129 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5130 } 5131 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 5132 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 5133 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 5134 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 5135 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5136 5137 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5138 B_mpi->assembled = PETSC_FALSE; 5139 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 5140 merge->bi = bi; 5141 merge->bj = bj; 5142 merge->buf_ri = buf_ri; 5143 merge->buf_rj = buf_rj; 5144 merge->coi = NULL; 5145 merge->coj = NULL; 5146 merge->owners_co = NULL; 5147 5148 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 5149 5150 /* attach the supporting struct to B_mpi for reuse */ 5151 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 5152 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 5153 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 5154 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 5155 *mpimat = B_mpi; 5156 5157 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 5158 PetscFunctionReturn(0); 5159 } 5160 5161 #undef __FUNCT__ 5162 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 5163 /*@C 5164 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 5165 matrices from each processor 5166 5167 Collective on MPI_Comm 5168 5169 Input Parameters: 5170 + comm - the communicators the parallel matrix will live on 5171 . seqmat - the input sequential matrices 5172 . m - number of local rows (or PETSC_DECIDE) 5173 . n - number of local columns (or PETSC_DECIDE) 5174 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5175 5176 Output Parameter: 5177 . mpimat - the parallel matrix generated 5178 5179 Level: advanced 5180 5181 Notes: 5182 The dimensions of the sequential matrix in each processor MUST be the same. 5183 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5184 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 5185 @*/ 5186 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 5187 { 5188 PetscErrorCode ierr; 5189 PetscMPIInt size; 5190 5191 PetscFunctionBegin; 5192 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5193 if (size == 1) { 5194 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5195 if (scall == MAT_INITIAL_MATRIX) { 5196 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 5197 } else { 5198 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5199 } 5200 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5201 PetscFunctionReturn(0); 5202 } 5203 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5204 if (scall == MAT_INITIAL_MATRIX) { 5205 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 5206 } 5207 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 5208 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5209 PetscFunctionReturn(0); 5210 } 5211 5212 #undef __FUNCT__ 5213 #define __FUNCT__ "MatMPIAIJGetLocalMat" 5214 /*@ 5215 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 5216 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5217 with MatGetSize() 5218 5219 Not Collective 5220 5221 Input Parameters: 5222 + A - the matrix 5223 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5224 5225 Output Parameter: 5226 . A_loc - the local sequential matrix generated 5227 5228 Level: developer 5229 5230 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 5231 5232 @*/ 5233 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5234 { 5235 PetscErrorCode ierr; 5236 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5237 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 5238 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 5239 MatScalar *aa=a->a,*ba=b->a,*cam; 5240 PetscScalar *ca; 5241 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5242 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5243 PetscBool match; 5244 5245 PetscFunctionBegin; 5246 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5247 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5248 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5249 if (scall == MAT_INITIAL_MATRIX) { 5250 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 5251 ci[0] = 0; 5252 for (i=0; i<am; i++) { 5253 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5254 } 5255 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 5256 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 5257 k = 0; 5258 for (i=0; i<am; i++) { 5259 ncols_o = bi[i+1] - bi[i]; 5260 ncols_d = ai[i+1] - ai[i]; 5261 /* off-diagonal portion of A */ 5262 for (jo=0; jo<ncols_o; jo++) { 5263 col = cmap[*bj]; 5264 if (col >= cstart) break; 5265 cj[k] = col; bj++; 5266 ca[k++] = *ba++; 5267 } 5268 /* diagonal portion of A */ 5269 for (j=0; j<ncols_d; j++) { 5270 cj[k] = cstart + *aj++; 5271 ca[k++] = *aa++; 5272 } 5273 /* off-diagonal portion of A */ 5274 for (j=jo; j<ncols_o; j++) { 5275 cj[k] = cmap[*bj++]; 5276 ca[k++] = *ba++; 5277 } 5278 } 5279 /* put together the new matrix */ 5280 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5281 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5282 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5283 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5284 mat->free_a = PETSC_TRUE; 5285 mat->free_ij = PETSC_TRUE; 5286 mat->nonew = 0; 5287 } else if (scall == MAT_REUSE_MATRIX) { 5288 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5289 ci = mat->i; cj = mat->j; cam = mat->a; 5290 for (i=0; i<am; i++) { 5291 /* off-diagonal portion of A */ 5292 ncols_o = bi[i+1] - bi[i]; 5293 for (jo=0; jo<ncols_o; jo++) { 5294 col = cmap[*bj]; 5295 if (col >= cstart) break; 5296 *cam++ = *ba++; bj++; 5297 } 5298 /* diagonal portion of A */ 5299 ncols_d = ai[i+1] - ai[i]; 5300 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5301 /* off-diagonal portion of A */ 5302 for (j=jo; j<ncols_o; j++) { 5303 *cam++ = *ba++; bj++; 5304 } 5305 } 5306 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5307 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5308 PetscFunctionReturn(0); 5309 } 5310 5311 #undef __FUNCT__ 5312 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5313 /*@C 5314 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5315 5316 Not Collective 5317 5318 Input Parameters: 5319 + A - the matrix 5320 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5321 - row, col - index sets of rows and columns to extract (or NULL) 5322 5323 Output Parameter: 5324 . A_loc - the local sequential matrix generated 5325 5326 Level: developer 5327 5328 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5329 5330 @*/ 5331 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5332 { 5333 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5334 PetscErrorCode ierr; 5335 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5336 IS isrowa,iscola; 5337 Mat *aloc; 5338 PetscBool match; 5339 5340 PetscFunctionBegin; 5341 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5342 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5343 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5344 if (!row) { 5345 start = A->rmap->rstart; end = A->rmap->rend; 5346 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5347 } else { 5348 isrowa = *row; 5349 } 5350 if (!col) { 5351 start = A->cmap->rstart; 5352 cmap = a->garray; 5353 nzA = a->A->cmap->n; 5354 nzB = a->B->cmap->n; 5355 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5356 ncols = 0; 5357 for (i=0; i<nzB; i++) { 5358 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5359 else break; 5360 } 5361 imark = i; 5362 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5363 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5364 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5365 } else { 5366 iscola = *col; 5367 } 5368 if (scall != MAT_INITIAL_MATRIX) { 5369 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5370 aloc[0] = *A_loc; 5371 } 5372 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5373 *A_loc = aloc[0]; 5374 ierr = PetscFree(aloc);CHKERRQ(ierr); 5375 if (!row) { 5376 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5377 } 5378 if (!col) { 5379 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5380 } 5381 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5382 PetscFunctionReturn(0); 5383 } 5384 5385 #undef __FUNCT__ 5386 #define __FUNCT__ "MatGetBrowsOfAcols" 5387 /*@C 5388 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5389 5390 Collective on Mat 5391 5392 Input Parameters: 5393 + A,B - the matrices in mpiaij format 5394 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5395 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5396 5397 Output Parameter: 5398 + rowb, colb - index sets of rows and columns of B to extract 5399 - B_seq - the sequential matrix generated 5400 5401 Level: developer 5402 5403 @*/ 5404 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5405 { 5406 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5407 PetscErrorCode ierr; 5408 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5409 IS isrowb,iscolb; 5410 Mat *bseq=NULL; 5411 5412 PetscFunctionBegin; 5413 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5414 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5415 } 5416 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5417 5418 if (scall == MAT_INITIAL_MATRIX) { 5419 start = A->cmap->rstart; 5420 cmap = a->garray; 5421 nzA = a->A->cmap->n; 5422 nzB = a->B->cmap->n; 5423 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5424 ncols = 0; 5425 for (i=0; i<nzB; i++) { /* row < local row index */ 5426 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5427 else break; 5428 } 5429 imark = i; 5430 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5431 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5432 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5433 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5434 } else { 5435 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5436 isrowb = *rowb; iscolb = *colb; 5437 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5438 bseq[0] = *B_seq; 5439 } 5440 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5441 *B_seq = bseq[0]; 5442 ierr = PetscFree(bseq);CHKERRQ(ierr); 5443 if (!rowb) { 5444 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5445 } else { 5446 *rowb = isrowb; 5447 } 5448 if (!colb) { 5449 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5450 } else { 5451 *colb = iscolb; 5452 } 5453 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5454 PetscFunctionReturn(0); 5455 } 5456 5457 #undef __FUNCT__ 5458 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5459 /* 5460 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5461 of the OFF-DIAGONAL portion of local A 5462 5463 Collective on Mat 5464 5465 Input Parameters: 5466 + A,B - the matrices in mpiaij format 5467 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5468 5469 Output Parameter: 5470 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5471 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5472 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5473 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5474 5475 Level: developer 5476 5477 */ 5478 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5479 { 5480 VecScatter_MPI_General *gen_to,*gen_from; 5481 PetscErrorCode ierr; 5482 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5483 Mat_SeqAIJ *b_oth; 5484 VecScatter ctx =a->Mvctx; 5485 MPI_Comm comm; 5486 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5487 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5488 PetscScalar *rvalues,*svalues; 5489 MatScalar *b_otha,*bufa,*bufA; 5490 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5491 MPI_Request *rwaits = NULL,*swaits = NULL; 5492 MPI_Status *sstatus,rstatus; 5493 PetscMPIInt jj; 5494 PetscInt *cols,sbs,rbs; 5495 PetscScalar *vals; 5496 5497 PetscFunctionBegin; 5498 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5499 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5500 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5501 } 5502 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5503 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5504 5505 gen_to = (VecScatter_MPI_General*)ctx->todata; 5506 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5507 rvalues = gen_from->values; /* holds the length of receiving row */ 5508 svalues = gen_to->values; /* holds the length of sending row */ 5509 nrecvs = gen_from->n; 5510 nsends = gen_to->n; 5511 5512 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5513 srow = gen_to->indices; /* local row index to be sent */ 5514 sstarts = gen_to->starts; 5515 sprocs = gen_to->procs; 5516 sstatus = gen_to->sstatus; 5517 sbs = gen_to->bs; 5518 rstarts = gen_from->starts; 5519 rprocs = gen_from->procs; 5520 rbs = gen_from->bs; 5521 5522 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5523 if (scall == MAT_INITIAL_MATRIX) { 5524 /* i-array */ 5525 /*---------*/ 5526 /* post receives */ 5527 for (i=0; i<nrecvs; i++) { 5528 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5529 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5530 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5531 } 5532 5533 /* pack the outgoing message */ 5534 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5535 5536 sstartsj[0] = 0; 5537 rstartsj[0] = 0; 5538 len = 0; /* total length of j or a array to be sent */ 5539 k = 0; 5540 for (i=0; i<nsends; i++) { 5541 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5542 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5543 for (j=0; j<nrows; j++) { 5544 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5545 for (l=0; l<sbs; l++) { 5546 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5547 5548 rowlen[j*sbs+l] = ncols; 5549 5550 len += ncols; 5551 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5552 } 5553 k++; 5554 } 5555 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5556 5557 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5558 } 5559 /* recvs and sends of i-array are completed */ 5560 i = nrecvs; 5561 while (i--) { 5562 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5563 } 5564 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5565 5566 /* allocate buffers for sending j and a arrays */ 5567 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5568 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5569 5570 /* create i-array of B_oth */ 5571 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5572 5573 b_othi[0] = 0; 5574 len = 0; /* total length of j or a array to be received */ 5575 k = 0; 5576 for (i=0; i<nrecvs; i++) { 5577 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5578 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5579 for (j=0; j<nrows; j++) { 5580 b_othi[k+1] = b_othi[k] + rowlen[j]; 5581 len += rowlen[j]; k++; 5582 } 5583 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5584 } 5585 5586 /* allocate space for j and a arrrays of B_oth */ 5587 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5588 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5589 5590 /* j-array */ 5591 /*---------*/ 5592 /* post receives of j-array */ 5593 for (i=0; i<nrecvs; i++) { 5594 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5595 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5596 } 5597 5598 /* pack the outgoing message j-array */ 5599 k = 0; 5600 for (i=0; i<nsends; i++) { 5601 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5602 bufJ = bufj+sstartsj[i]; 5603 for (j=0; j<nrows; j++) { 5604 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5605 for (ll=0; ll<sbs; ll++) { 5606 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5607 for (l=0; l<ncols; l++) { 5608 *bufJ++ = cols[l]; 5609 } 5610 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5611 } 5612 } 5613 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5614 } 5615 5616 /* recvs and sends of j-array are completed */ 5617 i = nrecvs; 5618 while (i--) { 5619 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5620 } 5621 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5622 } else if (scall == MAT_REUSE_MATRIX) { 5623 sstartsj = *startsj_s; 5624 rstartsj = *startsj_r; 5625 bufa = *bufa_ptr; 5626 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5627 b_otha = b_oth->a; 5628 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5629 5630 /* a-array */ 5631 /*---------*/ 5632 /* post receives of a-array */ 5633 for (i=0; i<nrecvs; i++) { 5634 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5635 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5636 } 5637 5638 /* pack the outgoing message a-array */ 5639 k = 0; 5640 for (i=0; i<nsends; i++) { 5641 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5642 bufA = bufa+sstartsj[i]; 5643 for (j=0; j<nrows; j++) { 5644 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5645 for (ll=0; ll<sbs; ll++) { 5646 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5647 for (l=0; l<ncols; l++) { 5648 *bufA++ = vals[l]; 5649 } 5650 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5651 } 5652 } 5653 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5654 } 5655 /* recvs and sends of a-array are completed */ 5656 i = nrecvs; 5657 while (i--) { 5658 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5659 } 5660 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5661 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5662 5663 if (scall == MAT_INITIAL_MATRIX) { 5664 /* put together the new matrix */ 5665 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5666 5667 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5668 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5669 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5670 b_oth->free_a = PETSC_TRUE; 5671 b_oth->free_ij = PETSC_TRUE; 5672 b_oth->nonew = 0; 5673 5674 ierr = PetscFree(bufj);CHKERRQ(ierr); 5675 if (!startsj_s || !bufa_ptr) { 5676 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5677 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5678 } else { 5679 *startsj_s = sstartsj; 5680 *startsj_r = rstartsj; 5681 *bufa_ptr = bufa; 5682 } 5683 } 5684 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5685 PetscFunctionReturn(0); 5686 } 5687 5688 #undef __FUNCT__ 5689 #define __FUNCT__ "MatGetCommunicationStructs" 5690 /*@C 5691 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5692 5693 Not Collective 5694 5695 Input Parameters: 5696 . A - The matrix in mpiaij format 5697 5698 Output Parameter: 5699 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5700 . colmap - A map from global column index to local index into lvec 5701 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5702 5703 Level: developer 5704 5705 @*/ 5706 #if defined(PETSC_USE_CTABLE) 5707 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5708 #else 5709 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5710 #endif 5711 { 5712 Mat_MPIAIJ *a; 5713 5714 PetscFunctionBegin; 5715 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5716 PetscValidPointer(lvec, 2); 5717 PetscValidPointer(colmap, 3); 5718 PetscValidPointer(multScatter, 4); 5719 a = (Mat_MPIAIJ*) A->data; 5720 if (lvec) *lvec = a->lvec; 5721 if (colmap) *colmap = a->colmap; 5722 if (multScatter) *multScatter = a->Mvctx; 5723 PetscFunctionReturn(0); 5724 } 5725 5726 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5727 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5728 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5729 5730 #undef __FUNCT__ 5731 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5732 /* 5733 Computes (B'*A')' since computing B*A directly is untenable 5734 5735 n p p 5736 ( ) ( ) ( ) 5737 m ( A ) * n ( B ) = m ( C ) 5738 ( ) ( ) ( ) 5739 5740 */ 5741 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5742 { 5743 PetscErrorCode ierr; 5744 Mat At,Bt,Ct; 5745 5746 PetscFunctionBegin; 5747 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5748 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5749 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5750 ierr = MatDestroy(&At);CHKERRQ(ierr); 5751 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5752 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5753 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5754 PetscFunctionReturn(0); 5755 } 5756 5757 #undef __FUNCT__ 5758 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5759 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5760 { 5761 PetscErrorCode ierr; 5762 PetscInt m=A->rmap->n,n=B->cmap->n; 5763 Mat Cmat; 5764 5765 PetscFunctionBegin; 5766 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5767 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5768 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5769 ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 5770 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5771 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5772 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5773 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5774 5775 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5776 5777 *C = Cmat; 5778 PetscFunctionReturn(0); 5779 } 5780 5781 /* ----------------------------------------------------------------*/ 5782 #undef __FUNCT__ 5783 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5784 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5785 { 5786 PetscErrorCode ierr; 5787 5788 PetscFunctionBegin; 5789 if (scall == MAT_INITIAL_MATRIX) { 5790 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5791 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5792 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5793 } 5794 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5795 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5796 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5797 PetscFunctionReturn(0); 5798 } 5799 5800 #if defined(PETSC_HAVE_MUMPS) 5801 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5802 #endif 5803 #if defined(PETSC_HAVE_PASTIX) 5804 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5805 #endif 5806 #if defined(PETSC_HAVE_SUPERLU_DIST) 5807 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5808 #endif 5809 #if defined(PETSC_HAVE_CLIQUE) 5810 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*); 5811 #endif 5812 5813 /*MC 5814 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5815 5816 Options Database Keys: 5817 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5818 5819 Level: beginner 5820 5821 .seealso: MatCreateAIJ() 5822 M*/ 5823 5824 #undef __FUNCT__ 5825 #define __FUNCT__ "MatCreate_MPIAIJ" 5826 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5827 { 5828 Mat_MPIAIJ *b; 5829 PetscErrorCode ierr; 5830 PetscMPIInt size; 5831 5832 PetscFunctionBegin; 5833 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5834 5835 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5836 B->data = (void*)b; 5837 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5838 B->assembled = PETSC_FALSE; 5839 B->insertmode = NOT_SET_VALUES; 5840 b->size = size; 5841 5842 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5843 5844 /* build cache for off array entries formed */ 5845 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5846 5847 b->donotstash = PETSC_FALSE; 5848 b->colmap = 0; 5849 b->garray = 0; 5850 b->roworiented = PETSC_TRUE; 5851 5852 /* stuff used for matrix vector multiply */ 5853 b->lvec = NULL; 5854 b->Mvctx = NULL; 5855 5856 /* stuff for MatGetRow() */ 5857 b->rowindices = 0; 5858 b->rowvalues = 0; 5859 b->getrowactive = PETSC_FALSE; 5860 5861 /* flexible pointer used in CUSP/CUSPARSE classes */ 5862 b->spptr = NULL; 5863 5864 #if defined(PETSC_HAVE_MUMPS) 5865 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr); 5866 #endif 5867 #if defined(PETSC_HAVE_PASTIX) 5868 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5869 #endif 5870 #if defined(PETSC_HAVE_SUPERLU_DIST) 5871 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5872 #endif 5873 #if defined(PETSC_HAVE_CLIQUE) 5874 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr); 5875 #endif 5876 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5877 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5878 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5879 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5880 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5881 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5882 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5883 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5884 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5885 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5886 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5887 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5888 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5889 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5890 PetscFunctionReturn(0); 5891 } 5892 5893 #undef __FUNCT__ 5894 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5895 /*@ 5896 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5897 and "off-diagonal" part of the matrix in CSR format. 5898 5899 Collective on MPI_Comm 5900 5901 Input Parameters: 5902 + comm - MPI communicator 5903 . m - number of local rows (Cannot be PETSC_DECIDE) 5904 . n - This value should be the same as the local size used in creating the 5905 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5906 calculated if N is given) For square matrices n is almost always m. 5907 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5908 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5909 . i - row indices for "diagonal" portion of matrix 5910 . j - column indices 5911 . a - matrix values 5912 . oi - row indices for "off-diagonal" portion of matrix 5913 . oj - column indices 5914 - oa - matrix values 5915 5916 Output Parameter: 5917 . mat - the matrix 5918 5919 Level: advanced 5920 5921 Notes: 5922 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5923 must free the arrays once the matrix has been destroyed and not before. 5924 5925 The i and j indices are 0 based 5926 5927 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5928 5929 This sets local rows and cannot be used to set off-processor values. 5930 5931 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5932 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5933 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5934 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5935 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5936 communication if it is known that only local entries will be set. 5937 5938 .keywords: matrix, aij, compressed row, sparse, parallel 5939 5940 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5941 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5942 @*/ 5943 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5944 { 5945 PetscErrorCode ierr; 5946 Mat_MPIAIJ *maij; 5947 5948 PetscFunctionBegin; 5949 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5950 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5951 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5952 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5953 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5954 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5955 maij = (Mat_MPIAIJ*) (*mat)->data; 5956 5957 (*mat)->preallocated = PETSC_TRUE; 5958 5959 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5960 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5961 5962 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5963 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5964 5965 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5966 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5967 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5968 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5969 5970 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5971 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5972 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5973 PetscFunctionReturn(0); 5974 } 5975 5976 /* 5977 Special version for direct calls from Fortran 5978 */ 5979 #include <petsc-private/fortranimpl.h> 5980 5981 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5982 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5983 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5984 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5985 #endif 5986 5987 /* Change these macros so can be used in void function */ 5988 #undef CHKERRQ 5989 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5990 #undef SETERRQ2 5991 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5992 #undef SETERRQ3 5993 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5994 #undef SETERRQ 5995 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5996 5997 #undef __FUNCT__ 5998 #define __FUNCT__ "matsetvaluesmpiaij_" 5999 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 6000 { 6001 Mat mat = *mmat; 6002 PetscInt m = *mm, n = *mn; 6003 InsertMode addv = *maddv; 6004 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 6005 PetscScalar value; 6006 PetscErrorCode ierr; 6007 6008 MatCheckPreallocated(mat,1); 6009 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 6010 6011 #if defined(PETSC_USE_DEBUG) 6012 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 6013 #endif 6014 { 6015 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 6016 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 6017 PetscBool roworiented = aij->roworiented; 6018 6019 /* Some Variables required in the macro */ 6020 Mat A = aij->A; 6021 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 6022 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 6023 MatScalar *aa = a->a; 6024 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 6025 Mat B = aij->B; 6026 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 6027 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 6028 MatScalar *ba = b->a; 6029 6030 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 6031 PetscInt nonew = a->nonew; 6032 MatScalar *ap1,*ap2; 6033 6034 PetscFunctionBegin; 6035 for (i=0; i<m; i++) { 6036 if (im[i] < 0) continue; 6037 #if defined(PETSC_USE_DEBUG) 6038 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 6039 #endif 6040 if (im[i] >= rstart && im[i] < rend) { 6041 row = im[i] - rstart; 6042 lastcol1 = -1; 6043 rp1 = aj + ai[row]; 6044 ap1 = aa + ai[row]; 6045 rmax1 = aimax[row]; 6046 nrow1 = ailen[row]; 6047 low1 = 0; 6048 high1 = nrow1; 6049 lastcol2 = -1; 6050 rp2 = bj + bi[row]; 6051 ap2 = ba + bi[row]; 6052 rmax2 = bimax[row]; 6053 nrow2 = bilen[row]; 6054 low2 = 0; 6055 high2 = nrow2; 6056 6057 for (j=0; j<n; j++) { 6058 if (roworiented) value = v[i*n+j]; 6059 else value = v[i+j*m]; 6060 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 6061 if (in[j] >= cstart && in[j] < cend) { 6062 col = in[j] - cstart; 6063 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 6064 } else if (in[j] < 0) continue; 6065 #if defined(PETSC_USE_DEBUG) 6066 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 6067 #endif 6068 else { 6069 if (mat->was_assembled) { 6070 if (!aij->colmap) { 6071 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 6072 } 6073 #if defined(PETSC_USE_CTABLE) 6074 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 6075 col--; 6076 #else 6077 col = aij->colmap[in[j]] - 1; 6078 #endif 6079 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 6080 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 6081 col = in[j]; 6082 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 6083 B = aij->B; 6084 b = (Mat_SeqAIJ*)B->data; 6085 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 6086 rp2 = bj + bi[row]; 6087 ap2 = ba + bi[row]; 6088 rmax2 = bimax[row]; 6089 nrow2 = bilen[row]; 6090 low2 = 0; 6091 high2 = nrow2; 6092 bm = aij->B->rmap->n; 6093 ba = b->a; 6094 } 6095 } else col = in[j]; 6096 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 6097 } 6098 } 6099 } else if (!aij->donotstash) { 6100 if (roworiented) { 6101 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6102 } else { 6103 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6104 } 6105 } 6106 } 6107 } 6108 PetscFunctionReturnVoid(); 6109 } 6110 6111