1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc-private/vecimpl.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 /*MC 8 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 9 10 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 11 and MATMPIAIJ otherwise. As a result, for single process communicators, 12 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 13 for communicators controlling multiple processes. It is recommended that you call both of 14 the above preallocation routines for simplicity. 15 16 Options Database Keys: 17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 18 19 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 20 enough exist. 21 22 Level: beginner 23 24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 25 M*/ 26 27 /*MC 28 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 29 30 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 31 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 32 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 33 for communicators controlling multiple processes. It is recommended that you call both of 34 the above preallocation routines for simplicity. 35 36 Options Database Keys: 37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 38 39 Level: beginner 40 41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 42 M*/ 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 47 { 48 PetscErrorCode ierr; 49 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 50 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 51 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 52 const PetscInt *ia,*ib; 53 const MatScalar *aa,*bb; 54 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 55 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 56 57 PetscFunctionBegin; 58 *keptrows = 0; 59 ia = a->i; 60 ib = b->i; 61 for (i=0; i<m; i++) { 62 na = ia[i+1] - ia[i]; 63 nb = ib[i+1] - ib[i]; 64 if (!na && !nb) { 65 cnt++; 66 goto ok1; 67 } 68 aa = a->a + ia[i]; 69 for (j=0; j<na; j++) { 70 if (aa[j] != 0.0) goto ok1; 71 } 72 bb = b->a + ib[i]; 73 for (j=0; j <nb; j++) { 74 if (bb[j] != 0.0) goto ok1; 75 } 76 cnt++; 77 ok1:; 78 } 79 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 80 if (!n0rows) PetscFunctionReturn(0); 81 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 82 cnt = 0; 83 for (i=0; i<m; i++) { 84 na = ia[i+1] - ia[i]; 85 nb = ib[i+1] - ib[i]; 86 if (!na && !nb) continue; 87 aa = a->a + ia[i]; 88 for (j=0; j<na;j++) { 89 if (aa[j] != 0.0) { 90 rows[cnt++] = rstart + i; 91 goto ok2; 92 } 93 } 94 bb = b->a + ib[i]; 95 for (j=0; j<nb; j++) { 96 if (bb[j] != 0.0) { 97 rows[cnt++] = rstart + i; 98 goto ok2; 99 } 100 } 101 ok2:; 102 } 103 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 104 PetscFunctionReturn(0); 105 } 106 107 #undef __FUNCT__ 108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 110 { 111 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 112 PetscErrorCode ierr; 113 PetscInt i,rstart,nrows,*rows; 114 115 PetscFunctionBegin; 116 *zrows = NULL; 117 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 118 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 119 for (i=0; i<nrows; i++) rows[i] += rstart; 120 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 121 PetscFunctionReturn(0); 122 } 123 124 #undef __FUNCT__ 125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 127 { 128 PetscErrorCode ierr; 129 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 130 PetscInt i,n,*garray = aij->garray; 131 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 132 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 133 PetscReal *work; 134 135 PetscFunctionBegin; 136 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 137 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 138 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 139 if (type == NORM_2) { 140 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 141 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 142 } 143 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 144 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 145 } 146 } else if (type == NORM_1) { 147 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 148 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 149 } 150 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 151 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 152 } 153 } else if (type == NORM_INFINITY) { 154 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 155 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 156 } 157 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 158 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 159 } 160 161 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 162 if (type == NORM_INFINITY) { 163 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 164 } else { 165 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 166 } 167 ierr = PetscFree(work);CHKERRQ(ierr); 168 if (type == NORM_2) { 169 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 170 } 171 PetscFunctionReturn(0); 172 } 173 174 #undef __FUNCT__ 175 #define __FUNCT__ "MatDistribute_MPIAIJ" 176 /* 177 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 178 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 179 180 Only for square matrices 181 182 Used by a preconditioner, hence PETSC_EXTERN 183 */ 184 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 185 { 186 PetscMPIInt rank,size; 187 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld,bses[2]; 188 PetscErrorCode ierr; 189 Mat mat; 190 Mat_SeqAIJ *gmata; 191 PetscMPIInt tag; 192 MPI_Status status; 193 PetscBool aij; 194 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 195 196 PetscFunctionBegin; 197 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 198 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 199 if (!rank) { 200 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 201 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 202 } 203 if (reuse == MAT_INITIAL_MATRIX) { 204 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 205 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 206 if (!rank) { 207 bses[0] = gmat->rmap->bs; 208 bses[1] = gmat->cmap->bs; 209 } 210 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 211 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 212 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 213 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 214 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 215 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 216 217 rowners[0] = 0; 218 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 219 rstart = rowners[rank]; 220 rend = rowners[rank+1]; 221 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 222 if (!rank) { 223 gmata = (Mat_SeqAIJ*) gmat->data; 224 /* send row lengths to all processors */ 225 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 226 for (i=1; i<size; i++) { 227 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 228 } 229 /* determine number diagonal and off-diagonal counts */ 230 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 231 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 232 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 233 jj = 0; 234 for (i=0; i<m; i++) { 235 for (j=0; j<dlens[i]; j++) { 236 if (gmata->j[jj] < rstart) ld[i]++; 237 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 238 jj++; 239 } 240 } 241 /* send column indices to other processes */ 242 for (i=1; i<size; i++) { 243 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 244 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 245 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 246 } 247 248 /* send numerical values to other processes */ 249 for (i=1; i<size; i++) { 250 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 251 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 252 } 253 gmataa = gmata->a; 254 gmataj = gmata->j; 255 256 } else { 257 /* receive row lengths */ 258 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 259 /* receive column indices */ 260 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 261 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 262 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 263 /* determine number diagonal and off-diagonal counts */ 264 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 265 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 266 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 267 jj = 0; 268 for (i=0; i<m; i++) { 269 for (j=0; j<dlens[i]; j++) { 270 if (gmataj[jj] < rstart) ld[i]++; 271 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 272 jj++; 273 } 274 } 275 /* receive numerical values */ 276 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 277 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 278 } 279 /* set preallocation */ 280 for (i=0; i<m; i++) { 281 dlens[i] -= olens[i]; 282 } 283 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 284 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 285 286 for (i=0; i<m; i++) { 287 dlens[i] += olens[i]; 288 } 289 cnt = 0; 290 for (i=0; i<m; i++) { 291 row = rstart + i; 292 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 293 cnt += dlens[i]; 294 } 295 if (rank) { 296 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 297 } 298 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 299 ierr = PetscFree(rowners);CHKERRQ(ierr); 300 301 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 302 303 *inmat = mat; 304 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 305 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 306 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 307 mat = *inmat; 308 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 309 if (!rank) { 310 /* send numerical values to other processes */ 311 gmata = (Mat_SeqAIJ*) gmat->data; 312 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 313 gmataa = gmata->a; 314 for (i=1; i<size; i++) { 315 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 316 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 317 } 318 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 319 } else { 320 /* receive numerical values from process 0*/ 321 nz = Ad->nz + Ao->nz; 322 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 323 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 324 } 325 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 326 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 327 ad = Ad->a; 328 ao = Ao->a; 329 if (mat->rmap->n) { 330 i = 0; 331 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 332 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 333 } 334 for (i=1; i<mat->rmap->n; i++) { 335 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 336 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 337 } 338 i--; 339 if (mat->rmap->n) { 340 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 341 } 342 if (rank) { 343 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 344 } 345 } 346 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 347 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 348 PetscFunctionReturn(0); 349 } 350 351 /* 352 Local utility routine that creates a mapping from the global column 353 number to the local number in the off-diagonal part of the local 354 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 355 a slightly higher hash table cost; without it it is not scalable (each processor 356 has an order N integer array but is fast to acess. 357 */ 358 #undef __FUNCT__ 359 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 360 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 361 { 362 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 363 PetscErrorCode ierr; 364 PetscInt n = aij->B->cmap->n,i; 365 366 PetscFunctionBegin; 367 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 368 #if defined(PETSC_USE_CTABLE) 369 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 370 for (i=0; i<n; i++) { 371 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 372 } 373 #else 374 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 375 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 376 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 377 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 378 #endif 379 PetscFunctionReturn(0); 380 } 381 382 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 383 { \ 384 if (col <= lastcol1) low1 = 0; \ 385 else high1 = nrow1; \ 386 lastcol1 = col;\ 387 while (high1-low1 > 5) { \ 388 t = (low1+high1)/2; \ 389 if (rp1[t] > col) high1 = t; \ 390 else low1 = t; \ 391 } \ 392 for (_i=low1; _i<high1; _i++) { \ 393 if (rp1[_i] > col) break; \ 394 if (rp1[_i] == col) { \ 395 if (addv == ADD_VALUES) ap1[_i] += value; \ 396 else ap1[_i] = value; \ 397 goto a_noinsert; \ 398 } \ 399 } \ 400 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 401 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 402 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 403 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 404 N = nrow1++ - 1; a->nz++; high1++; \ 405 /* shift up all the later entries in this row */ \ 406 for (ii=N; ii>=_i; ii--) { \ 407 rp1[ii+1] = rp1[ii]; \ 408 ap1[ii+1] = ap1[ii]; \ 409 } \ 410 rp1[_i] = col; \ 411 ap1[_i] = value; \ 412 a_noinsert: ; \ 413 ailen[row] = nrow1; \ 414 } 415 416 417 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 418 { \ 419 if (col <= lastcol2) low2 = 0; \ 420 else high2 = nrow2; \ 421 lastcol2 = col; \ 422 while (high2-low2 > 5) { \ 423 t = (low2+high2)/2; \ 424 if (rp2[t] > col) high2 = t; \ 425 else low2 = t; \ 426 } \ 427 for (_i=low2; _i<high2; _i++) { \ 428 if (rp2[_i] > col) break; \ 429 if (rp2[_i] == col) { \ 430 if (addv == ADD_VALUES) ap2[_i] += value; \ 431 else ap2[_i] = value; \ 432 goto b_noinsert; \ 433 } \ 434 } \ 435 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 436 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 437 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 438 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 439 N = nrow2++ - 1; b->nz++; high2++; \ 440 /* shift up all the later entries in this row */ \ 441 for (ii=N; ii>=_i; ii--) { \ 442 rp2[ii+1] = rp2[ii]; \ 443 ap2[ii+1] = ap2[ii]; \ 444 } \ 445 rp2[_i] = col; \ 446 ap2[_i] = value; \ 447 b_noinsert: ; \ 448 bilen[row] = nrow2; \ 449 } 450 451 #undef __FUNCT__ 452 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 453 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 454 { 455 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 456 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 457 PetscErrorCode ierr; 458 PetscInt l,*garray = mat->garray,diag; 459 460 PetscFunctionBegin; 461 /* code only works for square matrices A */ 462 463 /* find size of row to the left of the diagonal part */ 464 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 465 row = row - diag; 466 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 467 if (garray[b->j[b->i[row]+l]] > diag) break; 468 } 469 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 470 471 /* diagonal part */ 472 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 473 474 /* right of diagonal part */ 475 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 476 PetscFunctionReturn(0); 477 } 478 479 #undef __FUNCT__ 480 #define __FUNCT__ "MatSetValues_MPIAIJ" 481 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 482 { 483 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 484 PetscScalar value; 485 PetscErrorCode ierr; 486 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 487 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 488 PetscBool roworiented = aij->roworiented; 489 490 /* Some Variables required in the macro */ 491 Mat A = aij->A; 492 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 493 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 494 MatScalar *aa = a->a; 495 PetscBool ignorezeroentries = a->ignorezeroentries; 496 Mat B = aij->B; 497 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 498 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 499 MatScalar *ba = b->a; 500 501 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 502 PetscInt nonew; 503 MatScalar *ap1,*ap2; 504 505 PetscFunctionBegin; 506 if (v) PetscValidScalarPointer(v,6); 507 for (i=0; i<m; i++) { 508 if (im[i] < 0) continue; 509 #if defined(PETSC_USE_DEBUG) 510 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 511 #endif 512 if (im[i] >= rstart && im[i] < rend) { 513 row = im[i] - rstart; 514 lastcol1 = -1; 515 rp1 = aj + ai[row]; 516 ap1 = aa + ai[row]; 517 rmax1 = aimax[row]; 518 nrow1 = ailen[row]; 519 low1 = 0; 520 high1 = nrow1; 521 lastcol2 = -1; 522 rp2 = bj + bi[row]; 523 ap2 = ba + bi[row]; 524 rmax2 = bimax[row]; 525 nrow2 = bilen[row]; 526 low2 = 0; 527 high2 = nrow2; 528 529 for (j=0; j<n; j++) { 530 if (v) { 531 if (roworiented) value = v[i*n+j]; 532 else value = v[i+j*m]; 533 } else value = 0.0; 534 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 535 if (in[j] >= cstart && in[j] < cend) { 536 col = in[j] - cstart; 537 nonew = a->nonew; 538 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 539 } else if (in[j] < 0) continue; 540 #if defined(PETSC_USE_DEBUG) 541 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 542 #endif 543 else { 544 if (mat->was_assembled) { 545 if (!aij->colmap) { 546 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 547 } 548 #if defined(PETSC_USE_CTABLE) 549 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 550 col--; 551 #else 552 col = aij->colmap[in[j]] - 1; 553 #endif 554 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 555 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 556 col = in[j]; 557 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 558 B = aij->B; 559 b = (Mat_SeqAIJ*)B->data; 560 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 561 rp2 = bj + bi[row]; 562 ap2 = ba + bi[row]; 563 rmax2 = bimax[row]; 564 nrow2 = bilen[row]; 565 low2 = 0; 566 high2 = nrow2; 567 bm = aij->B->rmap->n; 568 ba = b->a; 569 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 570 } else col = in[j]; 571 nonew = b->nonew; 572 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 573 } 574 } 575 } else { 576 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 577 if (!aij->donotstash) { 578 mat->assembled = PETSC_FALSE; 579 if (roworiented) { 580 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 581 } else { 582 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 583 } 584 } 585 } 586 } 587 PetscFunctionReturn(0); 588 } 589 590 #undef __FUNCT__ 591 #define __FUNCT__ "MatGetValues_MPIAIJ" 592 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 593 { 594 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 595 PetscErrorCode ierr; 596 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 597 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 598 599 PetscFunctionBegin; 600 for (i=0; i<m; i++) { 601 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 602 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 603 if (idxm[i] >= rstart && idxm[i] < rend) { 604 row = idxm[i] - rstart; 605 for (j=0; j<n; j++) { 606 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 607 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 608 if (idxn[j] >= cstart && idxn[j] < cend) { 609 col = idxn[j] - cstart; 610 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 611 } else { 612 if (!aij->colmap) { 613 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 614 } 615 #if defined(PETSC_USE_CTABLE) 616 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 617 col--; 618 #else 619 col = aij->colmap[idxn[j]] - 1; 620 #endif 621 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 622 else { 623 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 624 } 625 } 626 } 627 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 628 } 629 PetscFunctionReturn(0); 630 } 631 632 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 633 634 #undef __FUNCT__ 635 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 636 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 637 { 638 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 639 PetscErrorCode ierr; 640 PetscInt nstash,reallocs; 641 InsertMode addv; 642 643 PetscFunctionBegin; 644 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 645 646 /* make sure all processors are either in INSERTMODE or ADDMODE */ 647 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 648 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 649 mat->insertmode = addv; /* in case this processor had no cache */ 650 651 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 652 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 653 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 654 PetscFunctionReturn(0); 655 } 656 657 #undef __FUNCT__ 658 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 659 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 660 { 661 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 662 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 663 PetscErrorCode ierr; 664 PetscMPIInt n; 665 PetscInt i,j,rstart,ncols,flg; 666 PetscInt *row,*col; 667 PetscBool other_disassembled; 668 PetscScalar *val; 669 InsertMode addv = mat->insertmode; 670 671 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 672 673 PetscFunctionBegin; 674 if (!aij->donotstash && !mat->nooffprocentries) { 675 while (1) { 676 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 677 if (!flg) break; 678 679 for (i=0; i<n; ) { 680 /* Now identify the consecutive vals belonging to the same row */ 681 for (j=i,rstart=row[j]; j<n; j++) { 682 if (row[j] != rstart) break; 683 } 684 if (j < n) ncols = j-i; 685 else ncols = n-i; 686 /* Now assemble all these values with a single function call */ 687 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 688 689 i = j; 690 } 691 } 692 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 693 } 694 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 695 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 696 697 /* determine if any processor has disassembled, if so we must 698 also disassemble ourselfs, in order that we may reassemble. */ 699 /* 700 if nonzero structure of submatrix B cannot change then we know that 701 no processor disassembled thus we can skip this stuff 702 */ 703 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 704 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 705 if (mat->was_assembled && !other_disassembled) { 706 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 707 } 708 } 709 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 710 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 711 } 712 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 713 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 714 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 715 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 716 717 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 718 719 aij->rowvalues = 0; 720 721 /* used by MatAXPY() */ 722 a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 723 a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 724 725 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 726 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 727 PetscFunctionReturn(0); 728 } 729 730 #undef __FUNCT__ 731 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 732 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 733 { 734 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 735 PetscErrorCode ierr; 736 737 PetscFunctionBegin; 738 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 739 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 740 PetscFunctionReturn(0); 741 } 742 743 #undef __FUNCT__ 744 #define __FUNCT__ "MatZeroRows_MPIAIJ" 745 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 746 { 747 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 748 PetscErrorCode ierr; 749 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 750 PetscInt i,*owners = A->rmap->range; 751 PetscInt *nprocs,j,idx,nsends,row; 752 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 753 PetscInt *rvalues,count,base,slen,*source; 754 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 755 MPI_Comm comm; 756 MPI_Request *send_waits,*recv_waits; 757 MPI_Status recv_status,*send_status; 758 const PetscScalar *xx; 759 PetscScalar *bb; 760 #if defined(PETSC_DEBUG) 761 PetscBool found = PETSC_FALSE; 762 #endif 763 764 PetscFunctionBegin; 765 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 766 /* first count number of contributors to each processor */ 767 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 768 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 769 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 770 j = 0; 771 for (i=0; i<N; i++) { 772 if (lastidx > (idx = rows[i])) j = 0; 773 lastidx = idx; 774 for (; j<size; j++) { 775 if (idx >= owners[j] && idx < owners[j+1]) { 776 nprocs[2*j]++; 777 nprocs[2*j+1] = 1; 778 owner[i] = j; 779 #if defined(PETSC_DEBUG) 780 found = PETSC_TRUE; 781 #endif 782 break; 783 } 784 } 785 #if defined(PETSC_DEBUG) 786 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 787 found = PETSC_FALSE; 788 #endif 789 } 790 nsends = 0; 791 for (i=0; i<size; i++) nsends += nprocs[2*i+1]; 792 793 if (A->nooffproczerorows) { 794 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 795 nrecvs = nsends; 796 nmax = N; 797 } else { 798 /* inform other processors of number of messages and max length*/ 799 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 800 } 801 802 /* post receives: */ 803 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 804 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 805 for (i=0; i<nrecvs; i++) { 806 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 807 } 808 809 /* do sends: 810 1) starts[i] gives the starting index in svalues for stuff going to 811 the ith processor 812 */ 813 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 814 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 815 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 816 817 starts[0] = 0; 818 for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 819 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 820 821 starts[0] = 0; 822 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 823 count = 0; 824 for (i=0; i<size; i++) { 825 if (nprocs[2*i+1]) { 826 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 827 } 828 } 829 ierr = PetscFree(starts);CHKERRQ(ierr); 830 831 base = owners[rank]; 832 833 /* wait on receives */ 834 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 835 count = nrecvs; slen = 0; 836 while (count) { 837 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 838 /* unpack receives into our local space */ 839 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 840 841 source[imdex] = recv_status.MPI_SOURCE; 842 lens[imdex] = n; 843 slen += n; 844 count--; 845 } 846 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 847 848 /* move the data into the send scatter */ 849 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 850 count = 0; 851 for (i=0; i<nrecvs; i++) { 852 values = rvalues + i*nmax; 853 for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base; 854 } 855 ierr = PetscFree(rvalues);CHKERRQ(ierr); 856 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 857 ierr = PetscFree(owner);CHKERRQ(ierr); 858 ierr = PetscFree(nprocs);CHKERRQ(ierr); 859 860 /* fix right hand side if needed */ 861 if (x && b) { 862 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 863 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 864 for (i=0; i<slen; i++) bb[lrows[i]] = diag*xx[lrows[i]]; 865 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 866 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 867 } 868 /* 869 Zero the required rows. If the "diagonal block" of the matrix 870 is square and the user wishes to set the diagonal we use separate 871 code so that MatSetValues() is not called for each diagonal allocating 872 new memory, thus calling lots of mallocs and slowing things down. 873 874 */ 875 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 876 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 877 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 878 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 879 } else if (diag != 0.0) { 880 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 881 if (((Mat_SeqAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 882 for (i = 0; i < slen; i++) { 883 row = lrows[i] + rstart; 884 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 885 } 886 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 887 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 888 } else { 889 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 890 } 891 ierr = PetscFree(lrows);CHKERRQ(ierr); 892 893 /* wait on sends */ 894 if (nsends) { 895 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 896 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 897 ierr = PetscFree(send_status);CHKERRQ(ierr); 898 } 899 ierr = PetscFree(send_waits);CHKERRQ(ierr); 900 ierr = PetscFree(svalues);CHKERRQ(ierr); 901 PetscFunctionReturn(0); 902 } 903 904 #undef __FUNCT__ 905 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 906 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 907 { 908 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 909 PetscErrorCode ierr; 910 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 911 PetscInt i,*owners = A->rmap->range; 912 PetscInt *nprocs,j,idx,nsends; 913 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 914 PetscInt *rvalues,count,base,slen,*source; 915 PetscInt *lens,*lrows,*values,m; 916 MPI_Comm comm; 917 MPI_Request *send_waits,*recv_waits; 918 MPI_Status recv_status,*send_status; 919 const PetscScalar *xx; 920 PetscScalar *bb,*mask; 921 Vec xmask,lmask; 922 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 923 const PetscInt *aj, *ii,*ridx; 924 PetscScalar *aa; 925 #if defined(PETSC_DEBUG) 926 PetscBool found = PETSC_FALSE; 927 #endif 928 929 PetscFunctionBegin; 930 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 931 /* first count number of contributors to each processor */ 932 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 933 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 934 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 935 j = 0; 936 for (i=0; i<N; i++) { 937 if (lastidx > (idx = rows[i])) j = 0; 938 lastidx = idx; 939 for (; j<size; j++) { 940 if (idx >= owners[j] && idx < owners[j+1]) { 941 nprocs[2*j]++; 942 nprocs[2*j+1] = 1; 943 owner[i] = j; 944 #if defined(PETSC_DEBUG) 945 found = PETSC_TRUE; 946 #endif 947 break; 948 } 949 } 950 #if defined(PETSC_DEBUG) 951 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 952 found = PETSC_FALSE; 953 #endif 954 } 955 nsends = 0; for (i=0; i<size; i++) nsends += nprocs[2*i+1]; 956 957 /* inform other processors of number of messages and max length*/ 958 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 959 960 /* post receives: */ 961 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 962 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 963 for (i=0; i<nrecvs; i++) { 964 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 965 } 966 967 /* do sends: 968 1) starts[i] gives the starting index in svalues for stuff going to 969 the ith processor 970 */ 971 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 972 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 973 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 974 975 starts[0] = 0; 976 for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 977 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 978 979 starts[0] = 0; 980 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 981 count = 0; 982 for (i=0; i<size; i++) { 983 if (nprocs[2*i+1]) { 984 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 985 } 986 } 987 ierr = PetscFree(starts);CHKERRQ(ierr); 988 989 base = owners[rank]; 990 991 /* wait on receives */ 992 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 993 count = nrecvs; slen = 0; 994 while (count) { 995 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 996 /* unpack receives into our local space */ 997 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 998 999 source[imdex] = recv_status.MPI_SOURCE; 1000 lens[imdex] = n; 1001 slen += n; 1002 count--; 1003 } 1004 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1005 1006 /* move the data into the send scatter */ 1007 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1008 count = 0; 1009 for (i=0; i<nrecvs; i++) { 1010 values = rvalues + i*nmax; 1011 for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base; 1012 } 1013 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1014 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1015 ierr = PetscFree(owner);CHKERRQ(ierr); 1016 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1017 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 1018 1019 /* zero diagonal part of matrix */ 1020 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 1021 1022 /* handle off diagonal part of matrix */ 1023 ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr); 1024 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1025 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1026 for (i=0; i<slen; i++) bb[lrows[i]] = 1; 1027 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1028 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1029 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1030 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1031 if (x) { 1032 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1033 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1034 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1035 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1036 } 1037 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1038 1039 /* remove zeroed rows of off diagonal matrix */ 1040 ii = aij->i; 1041 for (i=0; i<slen; i++) { 1042 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1043 } 1044 1045 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1046 if (aij->compressedrow.use) { 1047 m = aij->compressedrow.nrows; 1048 ii = aij->compressedrow.i; 1049 ridx = aij->compressedrow.rindex; 1050 for (i=0; i<m; i++) { 1051 n = ii[i+1] - ii[i]; 1052 aj = aij->j + ii[i]; 1053 aa = aij->a + ii[i]; 1054 1055 for (j=0; j<n; j++) { 1056 if (PetscAbsScalar(mask[*aj])) { 1057 if (b) bb[*ridx] -= *aa*xx[*aj]; 1058 *aa = 0.0; 1059 } 1060 aa++; 1061 aj++; 1062 } 1063 ridx++; 1064 } 1065 } else { /* do not use compressed row format */ 1066 m = l->B->rmap->n; 1067 for (i=0; i<m; i++) { 1068 n = ii[i+1] - ii[i]; 1069 aj = aij->j + ii[i]; 1070 aa = aij->a + ii[i]; 1071 for (j=0; j<n; j++) { 1072 if (PetscAbsScalar(mask[*aj])) { 1073 if (b) bb[i] -= *aa*xx[*aj]; 1074 *aa = 0.0; 1075 } 1076 aa++; 1077 aj++; 1078 } 1079 } 1080 } 1081 if (x) { 1082 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1083 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1084 } 1085 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1086 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1087 ierr = PetscFree(lrows);CHKERRQ(ierr); 1088 1089 /* wait on sends */ 1090 if (nsends) { 1091 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1092 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1093 ierr = PetscFree(send_status);CHKERRQ(ierr); 1094 } 1095 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1096 ierr = PetscFree(svalues);CHKERRQ(ierr); 1097 PetscFunctionReturn(0); 1098 } 1099 1100 #undef __FUNCT__ 1101 #define __FUNCT__ "MatMult_MPIAIJ" 1102 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1103 { 1104 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1105 PetscErrorCode ierr; 1106 PetscInt nt; 1107 1108 PetscFunctionBegin; 1109 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1110 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1111 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1112 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1113 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1114 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1115 PetscFunctionReturn(0); 1116 } 1117 1118 #undef __FUNCT__ 1119 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1120 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1121 { 1122 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1123 PetscErrorCode ierr; 1124 1125 PetscFunctionBegin; 1126 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1127 PetscFunctionReturn(0); 1128 } 1129 1130 #undef __FUNCT__ 1131 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1132 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1133 { 1134 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1135 PetscErrorCode ierr; 1136 1137 PetscFunctionBegin; 1138 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1139 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1140 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1141 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1142 PetscFunctionReturn(0); 1143 } 1144 1145 #undef __FUNCT__ 1146 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1147 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1148 { 1149 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1150 PetscErrorCode ierr; 1151 PetscBool merged; 1152 1153 PetscFunctionBegin; 1154 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1155 /* do nondiagonal part */ 1156 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1157 if (!merged) { 1158 /* send it on its way */ 1159 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1160 /* do local part */ 1161 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1162 /* receive remote parts: note this assumes the values are not actually */ 1163 /* added in yy until the next line, */ 1164 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1165 } else { 1166 /* do local part */ 1167 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1168 /* send it on its way */ 1169 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1170 /* values actually were received in the Begin() but we need to call this nop */ 1171 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1172 } 1173 PetscFunctionReturn(0); 1174 } 1175 1176 #undef __FUNCT__ 1177 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1178 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1179 { 1180 MPI_Comm comm; 1181 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1182 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1183 IS Me,Notme; 1184 PetscErrorCode ierr; 1185 PetscInt M,N,first,last,*notme,i; 1186 PetscMPIInt size; 1187 1188 PetscFunctionBegin; 1189 /* Easy test: symmetric diagonal block */ 1190 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1191 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1192 if (!*f) PetscFunctionReturn(0); 1193 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1194 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1195 if (size == 1) PetscFunctionReturn(0); 1196 1197 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1198 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1199 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1200 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1201 for (i=0; i<first; i++) notme[i] = i; 1202 for (i=last; i<M; i++) notme[i-last+first] = i; 1203 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1204 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1205 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1206 Aoff = Aoffs[0]; 1207 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1208 Boff = Boffs[0]; 1209 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1210 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1211 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1212 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1213 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1214 ierr = PetscFree(notme);CHKERRQ(ierr); 1215 PetscFunctionReturn(0); 1216 } 1217 1218 #undef __FUNCT__ 1219 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1220 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1221 { 1222 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1223 PetscErrorCode ierr; 1224 1225 PetscFunctionBegin; 1226 /* do nondiagonal part */ 1227 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1228 /* send it on its way */ 1229 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1230 /* do local part */ 1231 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1232 /* receive remote parts */ 1233 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1234 PetscFunctionReturn(0); 1235 } 1236 1237 /* 1238 This only works correctly for square matrices where the subblock A->A is the 1239 diagonal block 1240 */ 1241 #undef __FUNCT__ 1242 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1243 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1244 { 1245 PetscErrorCode ierr; 1246 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1247 1248 PetscFunctionBegin; 1249 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1250 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1251 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1252 PetscFunctionReturn(0); 1253 } 1254 1255 #undef __FUNCT__ 1256 #define __FUNCT__ "MatScale_MPIAIJ" 1257 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1258 { 1259 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1260 PetscErrorCode ierr; 1261 1262 PetscFunctionBegin; 1263 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1264 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1265 PetscFunctionReturn(0); 1266 } 1267 1268 #undef __FUNCT__ 1269 #define __FUNCT__ "MatDestroy_MPIAIJ" 1270 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1271 { 1272 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1273 PetscErrorCode ierr; 1274 1275 PetscFunctionBegin; 1276 #if defined(PETSC_USE_LOG) 1277 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1278 #endif 1279 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1280 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1281 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1282 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1283 #if defined(PETSC_USE_CTABLE) 1284 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1285 #else 1286 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1287 #endif 1288 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1289 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1290 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1291 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1292 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1293 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1294 1295 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1296 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1297 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1298 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1299 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1303 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1304 PetscFunctionReturn(0); 1305 } 1306 1307 #undef __FUNCT__ 1308 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1309 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1310 { 1311 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1312 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1313 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1314 PetscErrorCode ierr; 1315 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1316 int fd; 1317 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1318 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1319 PetscScalar *column_values; 1320 PetscInt message_count,flowcontrolcount; 1321 FILE *file; 1322 1323 PetscFunctionBegin; 1324 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1325 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1326 nz = A->nz + B->nz; 1327 if (!rank) { 1328 header[0] = MAT_FILE_CLASSID; 1329 header[1] = mat->rmap->N; 1330 header[2] = mat->cmap->N; 1331 1332 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1333 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1334 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1335 /* get largest number of rows any processor has */ 1336 rlen = mat->rmap->n; 1337 range = mat->rmap->range; 1338 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1339 } else { 1340 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1341 rlen = mat->rmap->n; 1342 } 1343 1344 /* load up the local row counts */ 1345 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1346 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1347 1348 /* store the row lengths to the file */ 1349 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1350 if (!rank) { 1351 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1352 for (i=1; i<size; i++) { 1353 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1354 rlen = range[i+1] - range[i]; 1355 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1356 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1357 } 1358 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1359 } else { 1360 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1361 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1362 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1363 } 1364 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1365 1366 /* load up the local column indices */ 1367 nzmax = nz; /* th processor needs space a largest processor needs */ 1368 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1369 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1370 cnt = 0; 1371 for (i=0; i<mat->rmap->n; i++) { 1372 for (j=B->i[i]; j<B->i[i+1]; j++) { 1373 if ((col = garray[B->j[j]]) > cstart) break; 1374 column_indices[cnt++] = col; 1375 } 1376 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1377 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1378 } 1379 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1380 1381 /* store the column indices to the file */ 1382 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1383 if (!rank) { 1384 MPI_Status status; 1385 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1386 for (i=1; i<size; i++) { 1387 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1388 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1389 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1390 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1391 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1392 } 1393 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1394 } else { 1395 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1396 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1397 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1398 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1399 } 1400 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1401 1402 /* load up the local column values */ 1403 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1404 cnt = 0; 1405 for (i=0; i<mat->rmap->n; i++) { 1406 for (j=B->i[i]; j<B->i[i+1]; j++) { 1407 if (garray[B->j[j]] > cstart) break; 1408 column_values[cnt++] = B->a[j]; 1409 } 1410 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1411 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1412 } 1413 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1414 1415 /* store the column values to the file */ 1416 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1417 if (!rank) { 1418 MPI_Status status; 1419 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1420 for (i=1; i<size; i++) { 1421 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1422 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1423 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1424 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1425 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1426 } 1427 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1428 } else { 1429 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1430 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1431 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1432 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1433 } 1434 ierr = PetscFree(column_values);CHKERRQ(ierr); 1435 1436 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1437 if (file) fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1438 PetscFunctionReturn(0); 1439 } 1440 1441 #include <petscdraw.h> 1442 #undef __FUNCT__ 1443 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1444 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1445 { 1446 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1447 PetscErrorCode ierr; 1448 PetscMPIInt rank = aij->rank,size = aij->size; 1449 PetscBool isdraw,iascii,isbinary; 1450 PetscViewer sviewer; 1451 PetscViewerFormat format; 1452 1453 PetscFunctionBegin; 1454 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1455 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1456 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1457 if (iascii) { 1458 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1459 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1460 MatInfo info; 1461 PetscBool inodes; 1462 1463 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1464 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1465 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1466 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1467 if (!inodes) { 1468 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1469 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1470 } else { 1471 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1472 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1473 } 1474 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1475 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1476 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1477 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1478 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1479 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1480 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1481 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1482 PetscFunctionReturn(0); 1483 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1484 PetscInt inodecount,inodelimit,*inodes; 1485 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1486 if (inodes) { 1487 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1488 } else { 1489 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1490 } 1491 PetscFunctionReturn(0); 1492 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1493 PetscFunctionReturn(0); 1494 } 1495 } else if (isbinary) { 1496 if (size == 1) { 1497 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1498 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1499 } else { 1500 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1501 } 1502 PetscFunctionReturn(0); 1503 } else if (isdraw) { 1504 PetscDraw draw; 1505 PetscBool isnull; 1506 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1507 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1508 } 1509 1510 if (size == 1) { 1511 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1512 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1513 } else { 1514 /* assemble the entire matrix onto first processor. */ 1515 Mat A; 1516 Mat_SeqAIJ *Aloc; 1517 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1518 MatScalar *a; 1519 1520 if (mat->rmap->N > 1024) { 1521 PetscBool flg = PETSC_FALSE; 1522 1523 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,NULL);CHKERRQ(ierr); 1524 if (!flg) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1525 } 1526 1527 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1528 if (!rank) { 1529 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1530 } else { 1531 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1532 } 1533 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1534 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1535 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1536 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1537 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1538 1539 /* copy over the A part */ 1540 Aloc = (Mat_SeqAIJ*)aij->A->data; 1541 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1542 row = mat->rmap->rstart; 1543 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1544 for (i=0; i<m; i++) { 1545 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1546 row++; 1547 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1548 } 1549 aj = Aloc->j; 1550 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1551 1552 /* copy over the B part */ 1553 Aloc = (Mat_SeqAIJ*)aij->B->data; 1554 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1555 row = mat->rmap->rstart; 1556 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1557 ct = cols; 1558 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1559 for (i=0; i<m; i++) { 1560 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1561 row++; 1562 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1563 } 1564 ierr = PetscFree(ct);CHKERRQ(ierr); 1565 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1566 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1567 /* 1568 Everyone has to call to draw the matrix since the graphics waits are 1569 synchronized across all processors that share the PetscDraw object 1570 */ 1571 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1572 if (!rank) { 1573 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1574 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1575 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1576 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1577 } 1578 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1579 ierr = MatDestroy(&A);CHKERRQ(ierr); 1580 } 1581 PetscFunctionReturn(0); 1582 } 1583 1584 #undef __FUNCT__ 1585 #define __FUNCT__ "MatView_MPIAIJ" 1586 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1587 { 1588 PetscErrorCode ierr; 1589 PetscBool iascii,isdraw,issocket,isbinary; 1590 1591 PetscFunctionBegin; 1592 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1593 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1594 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1595 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1596 if (iascii || isdraw || isbinary || issocket) { 1597 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1598 } 1599 PetscFunctionReturn(0); 1600 } 1601 1602 #undef __FUNCT__ 1603 #define __FUNCT__ "MatSOR_MPIAIJ" 1604 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1605 { 1606 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1607 PetscErrorCode ierr; 1608 Vec bb1 = 0; 1609 PetscBool hasop; 1610 1611 PetscFunctionBegin; 1612 if (flag == SOR_APPLY_UPPER) { 1613 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1614 PetscFunctionReturn(0); 1615 } 1616 1617 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1618 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1619 } 1620 1621 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1622 if (flag & SOR_ZERO_INITIAL_GUESS) { 1623 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1624 its--; 1625 } 1626 1627 while (its--) { 1628 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1629 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1630 1631 /* update rhs: bb1 = bb - B*x */ 1632 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1633 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1634 1635 /* local sweep */ 1636 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1637 } 1638 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1639 if (flag & SOR_ZERO_INITIAL_GUESS) { 1640 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1641 its--; 1642 } 1643 while (its--) { 1644 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1645 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1646 1647 /* update rhs: bb1 = bb - B*x */ 1648 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1649 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1650 1651 /* local sweep */ 1652 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1653 } 1654 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1655 if (flag & SOR_ZERO_INITIAL_GUESS) { 1656 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1657 its--; 1658 } 1659 while (its--) { 1660 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1661 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1662 1663 /* update rhs: bb1 = bb - B*x */ 1664 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1665 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1666 1667 /* local sweep */ 1668 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1669 } 1670 } else if (flag & SOR_EISENSTAT) { 1671 Vec xx1; 1672 1673 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1674 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1675 1676 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1677 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1678 if (!mat->diag) { 1679 ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1680 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1681 } 1682 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1683 if (hasop) { 1684 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1685 } else { 1686 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1687 } 1688 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1689 1690 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1691 1692 /* local sweep */ 1693 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1694 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1695 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1696 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1697 1698 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1699 PetscFunctionReturn(0); 1700 } 1701 1702 #undef __FUNCT__ 1703 #define __FUNCT__ "MatPermute_MPIAIJ" 1704 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1705 { 1706 Mat aA,aB,Aperm; 1707 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1708 PetscScalar *aa,*ba; 1709 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1710 PetscSF rowsf,sf; 1711 IS parcolp = NULL; 1712 PetscBool done; 1713 PetscErrorCode ierr; 1714 1715 PetscFunctionBegin; 1716 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1717 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1718 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1719 ierr = PetscMalloc3(PetscMax(m,n),PetscInt,&work,m,PetscInt,&rdest,n,PetscInt,&cdest);CHKERRQ(ierr); 1720 1721 /* Invert row permutation to find out where my rows should go */ 1722 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1723 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1724 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1725 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1726 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1727 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1728 1729 /* Invert column permutation to find out where my columns should go */ 1730 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1731 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1732 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1733 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1734 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1735 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1736 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1737 1738 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1739 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1740 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1741 1742 /* Find out where my gcols should go */ 1743 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1744 ierr = PetscMalloc(ng*sizeof(PetscInt),&gcdest);CHKERRQ(ierr); 1745 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1746 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1747 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1748 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1749 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1750 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1751 1752 ierr = PetscMalloc4(m,PetscInt,&dnnz,m,PetscInt,&onnz,m,PetscInt,&tdnnz,m,PetscInt,&tonnz);CHKERRQ(ierr); 1753 ierr = PetscMemzero(dnnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1754 ierr = PetscMemzero(onnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1755 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1756 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1757 for (i=0; i<m; i++) { 1758 PetscInt row = rdest[i],rowner; 1759 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1760 for (j=ai[i]; j<ai[i+1]; j++) { 1761 PetscInt cowner,col = cdest[aj[j]]; 1762 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1763 if (rowner == cowner) dnnz[i]++; 1764 else onnz[i]++; 1765 } 1766 for (j=bi[i]; j<bi[i+1]; j++) { 1767 PetscInt cowner,col = gcdest[bj[j]]; 1768 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1769 if (rowner == cowner) dnnz[i]++; 1770 else onnz[i]++; 1771 } 1772 } 1773 ierr = PetscMemzero(tdnnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1774 ierr = PetscMemzero(tonnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1775 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1776 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1777 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1778 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1779 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1780 1781 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1782 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1783 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1784 for (i=0; i<m; i++) { 1785 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1786 PetscInt rowlen; 1787 rowlen = ai[i+1] - ai[i]; 1788 for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]]; 1789 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr); 1790 rowlen = bi[i+1] - bi[i]; 1791 for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]]; 1792 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr); 1793 } 1794 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1795 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1796 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1797 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1798 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1799 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1800 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1801 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1802 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1803 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1804 *B = Aperm; 1805 PetscFunctionReturn(0); 1806 } 1807 1808 #undef __FUNCT__ 1809 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1810 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1811 { 1812 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1813 Mat A = mat->A,B = mat->B; 1814 PetscErrorCode ierr; 1815 PetscReal isend[5],irecv[5]; 1816 1817 PetscFunctionBegin; 1818 info->block_size = 1.0; 1819 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1820 1821 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1822 isend[3] = info->memory; isend[4] = info->mallocs; 1823 1824 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1825 1826 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1827 isend[3] += info->memory; isend[4] += info->mallocs; 1828 if (flag == MAT_LOCAL) { 1829 info->nz_used = isend[0]; 1830 info->nz_allocated = isend[1]; 1831 info->nz_unneeded = isend[2]; 1832 info->memory = isend[3]; 1833 info->mallocs = isend[4]; 1834 } else if (flag == MAT_GLOBAL_MAX) { 1835 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1836 1837 info->nz_used = irecv[0]; 1838 info->nz_allocated = irecv[1]; 1839 info->nz_unneeded = irecv[2]; 1840 info->memory = irecv[3]; 1841 info->mallocs = irecv[4]; 1842 } else if (flag == MAT_GLOBAL_SUM) { 1843 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1844 1845 info->nz_used = irecv[0]; 1846 info->nz_allocated = irecv[1]; 1847 info->nz_unneeded = irecv[2]; 1848 info->memory = irecv[3]; 1849 info->mallocs = irecv[4]; 1850 } 1851 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1852 info->fill_ratio_needed = 0; 1853 info->factor_mallocs = 0; 1854 PetscFunctionReturn(0); 1855 } 1856 1857 #undef __FUNCT__ 1858 #define __FUNCT__ "MatSetOption_MPIAIJ" 1859 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1860 { 1861 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1862 PetscErrorCode ierr; 1863 1864 PetscFunctionBegin; 1865 switch (op) { 1866 case MAT_NEW_NONZERO_LOCATIONS: 1867 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1868 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1869 case MAT_KEEP_NONZERO_PATTERN: 1870 case MAT_NEW_NONZERO_LOCATION_ERR: 1871 case MAT_USE_INODES: 1872 case MAT_IGNORE_ZERO_ENTRIES: 1873 MatCheckPreallocated(A,1); 1874 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1875 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1876 break; 1877 case MAT_ROW_ORIENTED: 1878 a->roworiented = flg; 1879 1880 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1881 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1882 break; 1883 case MAT_NEW_DIAGONALS: 1884 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1885 break; 1886 case MAT_IGNORE_OFF_PROC_ENTRIES: 1887 a->donotstash = flg; 1888 break; 1889 case MAT_SPD: 1890 A->spd_set = PETSC_TRUE; 1891 A->spd = flg; 1892 if (flg) { 1893 A->symmetric = PETSC_TRUE; 1894 A->structurally_symmetric = PETSC_TRUE; 1895 A->symmetric_set = PETSC_TRUE; 1896 A->structurally_symmetric_set = PETSC_TRUE; 1897 } 1898 break; 1899 case MAT_SYMMETRIC: 1900 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1901 break; 1902 case MAT_STRUCTURALLY_SYMMETRIC: 1903 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1904 break; 1905 case MAT_HERMITIAN: 1906 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1907 break; 1908 case MAT_SYMMETRY_ETERNAL: 1909 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1910 break; 1911 default: 1912 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1913 } 1914 PetscFunctionReturn(0); 1915 } 1916 1917 #undef __FUNCT__ 1918 #define __FUNCT__ "MatGetRow_MPIAIJ" 1919 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1920 { 1921 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1922 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1923 PetscErrorCode ierr; 1924 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1925 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1926 PetscInt *cmap,*idx_p; 1927 1928 PetscFunctionBegin; 1929 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1930 mat->getrowactive = PETSC_TRUE; 1931 1932 if (!mat->rowvalues && (idx || v)) { 1933 /* 1934 allocate enough space to hold information from the longest row. 1935 */ 1936 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1937 PetscInt max = 1,tmp; 1938 for (i=0; i<matin->rmap->n; i++) { 1939 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1940 if (max < tmp) max = tmp; 1941 } 1942 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1943 } 1944 1945 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1946 lrow = row - rstart; 1947 1948 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1949 if (!v) {pvA = 0; pvB = 0;} 1950 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1951 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1952 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1953 nztot = nzA + nzB; 1954 1955 cmap = mat->garray; 1956 if (v || idx) { 1957 if (nztot) { 1958 /* Sort by increasing column numbers, assuming A and B already sorted */ 1959 PetscInt imark = -1; 1960 if (v) { 1961 *v = v_p = mat->rowvalues; 1962 for (i=0; i<nzB; i++) { 1963 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1964 else break; 1965 } 1966 imark = i; 1967 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1968 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1969 } 1970 if (idx) { 1971 *idx = idx_p = mat->rowindices; 1972 if (imark > -1) { 1973 for (i=0; i<imark; i++) { 1974 idx_p[i] = cmap[cworkB[i]]; 1975 } 1976 } else { 1977 for (i=0; i<nzB; i++) { 1978 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1979 else break; 1980 } 1981 imark = i; 1982 } 1983 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1984 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1985 } 1986 } else { 1987 if (idx) *idx = 0; 1988 if (v) *v = 0; 1989 } 1990 } 1991 *nz = nztot; 1992 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1993 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1994 PetscFunctionReturn(0); 1995 } 1996 1997 #undef __FUNCT__ 1998 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1999 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 2000 { 2001 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2002 2003 PetscFunctionBegin; 2004 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 2005 aij->getrowactive = PETSC_FALSE; 2006 PetscFunctionReturn(0); 2007 } 2008 2009 #undef __FUNCT__ 2010 #define __FUNCT__ "MatNorm_MPIAIJ" 2011 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 2012 { 2013 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2014 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 2015 PetscErrorCode ierr; 2016 PetscInt i,j,cstart = mat->cmap->rstart; 2017 PetscReal sum = 0.0; 2018 MatScalar *v; 2019 2020 PetscFunctionBegin; 2021 if (aij->size == 1) { 2022 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 2023 } else { 2024 if (type == NORM_FROBENIUS) { 2025 v = amat->a; 2026 for (i=0; i<amat->nz; i++) { 2027 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 2028 } 2029 v = bmat->a; 2030 for (i=0; i<bmat->nz; i++) { 2031 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 2032 } 2033 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2034 *norm = PetscSqrtReal(*norm); 2035 } else if (type == NORM_1) { /* max column norm */ 2036 PetscReal *tmp,*tmp2; 2037 PetscInt *jj,*garray = aij->garray; 2038 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 2039 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 2040 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 2041 *norm = 0.0; 2042 v = amat->a; jj = amat->j; 2043 for (j=0; j<amat->nz; j++) { 2044 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 2045 } 2046 v = bmat->a; jj = bmat->j; 2047 for (j=0; j<bmat->nz; j++) { 2048 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 2049 } 2050 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2051 for (j=0; j<mat->cmap->N; j++) { 2052 if (tmp2[j] > *norm) *norm = tmp2[j]; 2053 } 2054 ierr = PetscFree(tmp);CHKERRQ(ierr); 2055 ierr = PetscFree(tmp2);CHKERRQ(ierr); 2056 } else if (type == NORM_INFINITY) { /* max row norm */ 2057 PetscReal ntemp = 0.0; 2058 for (j=0; j<aij->A->rmap->n; j++) { 2059 v = amat->a + amat->i[j]; 2060 sum = 0.0; 2061 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 2062 sum += PetscAbsScalar(*v); v++; 2063 } 2064 v = bmat->a + bmat->i[j]; 2065 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2066 sum += PetscAbsScalar(*v); v++; 2067 } 2068 if (sum > ntemp) ntemp = sum; 2069 } 2070 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2071 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 2072 } 2073 PetscFunctionReturn(0); 2074 } 2075 2076 #undef __FUNCT__ 2077 #define __FUNCT__ "MatTranspose_MPIAIJ" 2078 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2079 { 2080 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2081 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2082 PetscErrorCode ierr; 2083 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 2084 PetscInt cstart = A->cmap->rstart,ncol; 2085 Mat B; 2086 MatScalar *array; 2087 2088 PetscFunctionBegin; 2089 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2090 2091 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2092 ai = Aloc->i; aj = Aloc->j; 2093 bi = Bloc->i; bj = Bloc->j; 2094 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2095 PetscInt *d_nnz,*g_nnz,*o_nnz; 2096 PetscSFNode *oloc; 2097 PETSC_UNUSED PetscSF sf; 2098 2099 ierr = PetscMalloc4(na,PetscInt,&d_nnz,na,PetscInt,&o_nnz,nb,PetscInt,&g_nnz,nb,PetscSFNode,&oloc);CHKERRQ(ierr); 2100 /* compute d_nnz for preallocation */ 2101 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2102 for (i=0; i<ai[ma]; i++) { 2103 d_nnz[aj[i]]++; 2104 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2105 } 2106 /* compute local off-diagonal contributions */ 2107 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 2108 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2109 /* map those to global */ 2110 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2111 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2112 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2113 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2114 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2115 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2116 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2117 2118 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2119 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2120 ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);CHKERRQ(ierr); 2121 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2122 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2123 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2124 } else { 2125 B = *matout; 2126 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2127 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2128 } 2129 2130 /* copy over the A part */ 2131 array = Aloc->a; 2132 row = A->rmap->rstart; 2133 for (i=0; i<ma; i++) { 2134 ncol = ai[i+1]-ai[i]; 2135 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2136 row++; 2137 array += ncol; aj += ncol; 2138 } 2139 aj = Aloc->j; 2140 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2141 2142 /* copy over the B part */ 2143 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2144 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2145 array = Bloc->a; 2146 row = A->rmap->rstart; 2147 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2148 cols_tmp = cols; 2149 for (i=0; i<mb; i++) { 2150 ncol = bi[i+1]-bi[i]; 2151 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2152 row++; 2153 array += ncol; cols_tmp += ncol; 2154 } 2155 ierr = PetscFree(cols);CHKERRQ(ierr); 2156 2157 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2158 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2159 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2160 *matout = B; 2161 } else { 2162 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2163 } 2164 PetscFunctionReturn(0); 2165 } 2166 2167 #undef __FUNCT__ 2168 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2169 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2170 { 2171 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2172 Mat a = aij->A,b = aij->B; 2173 PetscErrorCode ierr; 2174 PetscInt s1,s2,s3; 2175 2176 PetscFunctionBegin; 2177 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2178 if (rr) { 2179 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2180 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2181 /* Overlap communication with computation. */ 2182 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2183 } 2184 if (ll) { 2185 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2186 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2187 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2188 } 2189 /* scale the diagonal block */ 2190 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2191 2192 if (rr) { 2193 /* Do a scatter end and then right scale the off-diagonal block */ 2194 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2195 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2196 } 2197 PetscFunctionReturn(0); 2198 } 2199 2200 #undef __FUNCT__ 2201 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2202 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2203 { 2204 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2205 PetscErrorCode ierr; 2206 2207 PetscFunctionBegin; 2208 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2209 PetscFunctionReturn(0); 2210 } 2211 2212 #undef __FUNCT__ 2213 #define __FUNCT__ "MatEqual_MPIAIJ" 2214 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2215 { 2216 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2217 Mat a,b,c,d; 2218 PetscBool flg; 2219 PetscErrorCode ierr; 2220 2221 PetscFunctionBegin; 2222 a = matA->A; b = matA->B; 2223 c = matB->A; d = matB->B; 2224 2225 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2226 if (flg) { 2227 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2228 } 2229 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2230 PetscFunctionReturn(0); 2231 } 2232 2233 #undef __FUNCT__ 2234 #define __FUNCT__ "MatCopy_MPIAIJ" 2235 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2236 { 2237 PetscErrorCode ierr; 2238 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2239 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2240 2241 PetscFunctionBegin; 2242 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2243 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2244 /* because of the column compression in the off-processor part of the matrix a->B, 2245 the number of columns in a->B and b->B may be different, hence we cannot call 2246 the MatCopy() directly on the two parts. If need be, we can provide a more 2247 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2248 then copying the submatrices */ 2249 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2250 } else { 2251 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2252 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2253 } 2254 PetscFunctionReturn(0); 2255 } 2256 2257 #undef __FUNCT__ 2258 #define __FUNCT__ "MatSetUp_MPIAIJ" 2259 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2260 { 2261 PetscErrorCode ierr; 2262 2263 PetscFunctionBegin; 2264 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2265 PetscFunctionReturn(0); 2266 } 2267 2268 #undef __FUNCT__ 2269 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2270 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2271 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2272 { 2273 PetscInt i,m=Y->rmap->N; 2274 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2275 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2276 const PetscInt *xi = x->i,*yi = y->i; 2277 2278 PetscFunctionBegin; 2279 /* Set the number of nonzeros in the new matrix */ 2280 for (i=0; i<m; i++) { 2281 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2282 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2283 nnz[i] = 0; 2284 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2285 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2286 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2287 nnz[i]++; 2288 } 2289 for (; k<nzy; k++) nnz[i]++; 2290 } 2291 PetscFunctionReturn(0); 2292 } 2293 2294 #undef __FUNCT__ 2295 #define __FUNCT__ "MatAXPY_MPIAIJ" 2296 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2297 { 2298 PetscErrorCode ierr; 2299 PetscInt i; 2300 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2301 PetscBLASInt bnz,one=1; 2302 Mat_SeqAIJ *x,*y; 2303 2304 PetscFunctionBegin; 2305 if (str == SAME_NONZERO_PATTERN) { 2306 PetscScalar alpha = a; 2307 x = (Mat_SeqAIJ*)xx->A->data; 2308 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2309 y = (Mat_SeqAIJ*)yy->A->data; 2310 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2311 x = (Mat_SeqAIJ*)xx->B->data; 2312 y = (Mat_SeqAIJ*)yy->B->data; 2313 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2314 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2315 } else if (str == SUBSET_NONZERO_PATTERN) { 2316 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2317 2318 x = (Mat_SeqAIJ*)xx->B->data; 2319 y = (Mat_SeqAIJ*)yy->B->data; 2320 if (y->xtoy && y->XtoY != xx->B) { 2321 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2322 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2323 } 2324 if (!y->xtoy) { /* get xtoy */ 2325 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2326 y->XtoY = xx->B; 2327 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2328 } 2329 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2330 } else { 2331 Mat B; 2332 PetscInt *nnz_d,*nnz_o; 2333 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2334 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2335 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2336 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2337 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2338 ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr); 2339 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2340 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2341 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2342 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2343 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2344 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2345 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2346 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2347 } 2348 PetscFunctionReturn(0); 2349 } 2350 2351 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2352 2353 #undef __FUNCT__ 2354 #define __FUNCT__ "MatConjugate_MPIAIJ" 2355 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2356 { 2357 #if defined(PETSC_USE_COMPLEX) 2358 PetscErrorCode ierr; 2359 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2360 2361 PetscFunctionBegin; 2362 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2363 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2364 #else 2365 PetscFunctionBegin; 2366 #endif 2367 PetscFunctionReturn(0); 2368 } 2369 2370 #undef __FUNCT__ 2371 #define __FUNCT__ "MatRealPart_MPIAIJ" 2372 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2373 { 2374 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2375 PetscErrorCode ierr; 2376 2377 PetscFunctionBegin; 2378 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2379 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2380 PetscFunctionReturn(0); 2381 } 2382 2383 #undef __FUNCT__ 2384 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2385 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2386 { 2387 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2388 PetscErrorCode ierr; 2389 2390 PetscFunctionBegin; 2391 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2392 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2393 PetscFunctionReturn(0); 2394 } 2395 2396 #if defined(PETSC_HAVE_PBGL) 2397 2398 #include <boost/parallel/mpi/bsp_process_group.hpp> 2399 #include <boost/graph/distributed/ilu_default_graph.hpp> 2400 #include <boost/graph/distributed/ilu_0_block.hpp> 2401 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2402 #include <boost/graph/distributed/petsc/interface.hpp> 2403 #include <boost/multi_array.hpp> 2404 #include <boost/parallel/distributed_property_map->hpp> 2405 2406 #undef __FUNCT__ 2407 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2408 /* 2409 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2410 */ 2411 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2412 { 2413 namespace petsc = boost::distributed::petsc; 2414 2415 namespace graph_dist = boost::graph::distributed; 2416 using boost::graph::distributed::ilu_default::process_group_type; 2417 using boost::graph::ilu_permuted; 2418 2419 PetscBool row_identity, col_identity; 2420 PetscContainer c; 2421 PetscInt m, n, M, N; 2422 PetscErrorCode ierr; 2423 2424 PetscFunctionBegin; 2425 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2426 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2427 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2428 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2429 2430 process_group_type pg; 2431 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2432 lgraph_type *lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2433 lgraph_type& level_graph = *lgraph_p; 2434 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2435 2436 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2437 ilu_permuted(level_graph); 2438 2439 /* put together the new matrix */ 2440 ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr); 2441 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2442 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2443 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2444 ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 2445 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2446 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2447 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2448 2449 ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c); 2450 ierr = PetscContainerSetPointer(c, lgraph_p); 2451 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2452 ierr = PetscContainerDestroy(&c); 2453 PetscFunctionReturn(0); 2454 } 2455 2456 #undef __FUNCT__ 2457 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2458 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2459 { 2460 PetscFunctionBegin; 2461 PetscFunctionReturn(0); 2462 } 2463 2464 #undef __FUNCT__ 2465 #define __FUNCT__ "MatSolve_MPIAIJ" 2466 /* 2467 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2468 */ 2469 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2470 { 2471 namespace graph_dist = boost::graph::distributed; 2472 2473 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2474 lgraph_type *lgraph_p; 2475 PetscContainer c; 2476 PetscErrorCode ierr; 2477 2478 PetscFunctionBegin; 2479 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr); 2480 ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr); 2481 ierr = VecCopy(b, x);CHKERRQ(ierr); 2482 2483 PetscScalar *array_x; 2484 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2485 PetscInt sx; 2486 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2487 2488 PetscScalar *array_b; 2489 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2490 PetscInt sb; 2491 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2492 2493 lgraph_type& level_graph = *lgraph_p; 2494 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2495 2496 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2497 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]); 2498 array_ref_type ref_x(array_x, boost::extents[num_vertices(graph)]); 2499 2500 typedef boost::iterator_property_map<array_ref_type::iterator, 2501 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2502 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)); 2503 gvector_type vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2504 2505 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2506 PetscFunctionReturn(0); 2507 } 2508 #endif 2509 2510 #undef __FUNCT__ 2511 #define __FUNCT__ "MatDestroy_MatRedundant" 2512 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2513 { 2514 PetscErrorCode ierr; 2515 Mat_Redundant *redund; 2516 PetscInt i; 2517 PetscMPIInt size; 2518 2519 PetscFunctionBegin; 2520 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2521 if (size == 1) { 2522 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2523 redund = a->redundant; 2524 } else { 2525 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2526 redund = a->redundant; 2527 } 2528 if (redund){ 2529 if (redund->matseq) { /* '-new' option */ 2530 ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr); 2531 ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr); 2532 ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr); 2533 ierr = PetscFree(redund->matseq);CHKERRQ(ierr); 2534 } else { 2535 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2536 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2537 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2538 for (i=0; i<redund->nrecvs; i++) { 2539 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2540 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2541 } 2542 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2543 2544 if (redund->psubcomm) { 2545 ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr); 2546 } 2547 } 2548 2549 ierr = redund->Destroy(A);CHKERRQ(ierr); 2550 ierr = PetscFree(redund);CHKERRQ(ierr); 2551 } 2552 PetscFunctionReturn(0); 2553 } 2554 2555 #undef __FUNCT__ 2556 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_psubcomm" 2557 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_psubcomm(Mat mat,PetscInt nsubcomm,PetscSubcomm psubcomm,MatReuse reuse,Mat *matredundant) 2558 { 2559 PetscMPIInt rank,size; 2560 MPI_Comm comm,subcomm=psubcomm->comm; 2561 PetscErrorCode ierr; 2562 PetscInt nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N; 2563 PetscMPIInt *send_rank= NULL,*recv_rank=NULL,subrank,subsize; 2564 PetscInt *rowrange = mat->rmap->range; 2565 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2566 Mat A = aij->A,B=aij->B,C=*matredundant; 2567 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2568 PetscScalar *sbuf_a; 2569 PetscInt nzlocal=a->nz+b->nz; 2570 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2571 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray; 2572 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2573 MatScalar *aworkA,*aworkB; 2574 PetscScalar *vals; 2575 PetscMPIInt tag1,tag2,tag3,imdex; 2576 MPI_Request *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL; 2577 MPI_Request *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL; 2578 MPI_Status recv_status,*send_status; 2579 PetscInt *sbuf_nz=NULL,*rbuf_nz=NULL,count; 2580 PetscInt **rbuf_j=NULL; 2581 PetscScalar **rbuf_a=NULL; 2582 Mat_Redundant *redund =NULL; 2583 PetscBool flg=PETSC_FALSE; 2584 2585 PetscFunctionBegin; 2586 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2587 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2588 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2589 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2590 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2591 2592 /* ---------- new imples: use MatGetSubMatrices() ------------*/ 2593 ierr = PetscOptionsGetBool(NULL,"-new",&flg,NULL);CHKERRQ(ierr); 2594 if (flg) { 2595 Mat *matseq; 2596 IS isrow,iscol; 2597 PetscInt mloc_sub,rstart,rend; 2598 2599 if (reuse == MAT_INITIAL_MATRIX) { 2600 /* create a local sequential matrix matseq[0] */ 2601 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2602 ierr = MatSetSizes(C,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2603 ierr = MatSetUp(C);CHKERRQ(ierr); 2604 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2605 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2606 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2607 2608 ierr = MatGetOwnershipRange(C,&rstart,&rend);CHKERRQ(ierr); 2609 ierr = MatDestroy(&C);CHKERRQ(ierr); 2610 mloc_sub = rend - rstart; 2611 2612 /* printf("[%d] Use MatGetSubMatrices()...rows %d - %d, mloc_sub %d\n",rank,rstart,rend,mloc_sub); */ 2613 ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr); 2614 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr); 2615 } else { /* reuse == MAT_REUSE_MATRIX */ 2616 if (subsize == 1) { 2617 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2618 redund = c->redundant; 2619 } else { 2620 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2621 redund = c->redundant; 2622 } 2623 2624 isrow = redund->isrow; 2625 iscol = redund->iscol; 2626 matseq = redund->matseq; 2627 } 2628 2629 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr); 2630 /* 2631 if (rank==0) { 2632 printf("[%d] matsub:\n",rank); 2633 ierr = MatView(matseq[0],PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 2634 } 2635 ierr = MPI_Barrier(comm);CHKERRQ(ierr); 2636 */ 2637 2638 /* Create matredundant by concatenating matseq[0] from processors in this subcomm */ 2639 /* 2640 if (reuse == MAT_REUSE_MATRIX) { 2641 if (!rank) printf("matredundant:\n"); 2642 ierr = MatView(*matredundant,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 2643 } 2644 */ 2645 ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr); 2646 /* 2647 if (nsubcomm == 1) { 2648 if (!rank) printf( "matredundant\n"); 2649 ierr = MatView(*matredundant,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 2650 } */ 2651 2652 if (reuse == MAT_INITIAL_MATRIX) { 2653 /* create a supporting struct and attach it to C for reuse */ 2654 ierr = PetscNewLog(*matredundant,Mat_Redundant,&redund);CHKERRQ(ierr); 2655 if (subsize == 1) { 2656 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2657 c->redundant = redund; 2658 } else { 2659 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2660 c->redundant = redund; 2661 } 2662 2663 redund->isrow = isrow; 2664 redund->iscol = iscol; 2665 redund->matseq = matseq; 2666 redund->psubcomm = NULL; 2667 2668 redund->Destroy = (*matredundant)->ops->destroy; 2669 (*matredundant)->ops->destroy = MatDestroy_MatRedundant; 2670 } 2671 PetscFunctionReturn(0); 2672 } 2673 /* ----------------------------------------------------*/ 2674 2675 if (reuse == MAT_REUSE_MATRIX) { 2676 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2677 if (subsize == 1) { 2678 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2679 redund = c->redundant; 2680 } else { 2681 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2682 redund = c->redundant; 2683 } 2684 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2685 2686 nsends = redund->nsends; 2687 nrecvs = redund->nrecvs; 2688 send_rank = redund->send_rank; 2689 recv_rank = redund->recv_rank; 2690 sbuf_nz = redund->sbuf_nz; 2691 rbuf_nz = redund->rbuf_nz; 2692 sbuf_j = redund->sbuf_j; 2693 sbuf_a = redund->sbuf_a; 2694 rbuf_j = redund->rbuf_j; 2695 rbuf_a = redund->rbuf_a; 2696 } 2697 2698 if (reuse == MAT_INITIAL_MATRIX) { 2699 PetscInt nleftover,np_subcomm; 2700 2701 /* get the destination processors' id send_rank, nsends and nrecvs */ 2702 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);CHKERRQ(ierr); 2703 2704 np_subcomm = size/nsubcomm; 2705 nleftover = size - nsubcomm*np_subcomm; 2706 2707 nsends = 0; nrecvs = 0; 2708 if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) { 2709 /* -------------------------------------------*/ 2710 for (i=0; i<size; i++) { 2711 if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */ 2712 send_rank[nsends] = i; nsends++; 2713 recv_rank[nrecvs++] = i; 2714 /* printf("[%d] send to and recv from [%d]\n",rank,i); */ 2715 } 2716 } 2717 if (rank >= size - nleftover) { /* this proc is a leftover processor */ 2718 i = size-nleftover-1; 2719 j = 0; 2720 while (j < nsubcomm - nleftover) { 2721 send_rank[nsends++] = i; 2722 i--; j++; 2723 /* printf("[%d] send to [%d]\n",rank,i); */ 2724 } 2725 } 2726 2727 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */ 2728 for (i=0; i<nleftover; i++) { 2729 recv_rank[nrecvs++] = size-nleftover+i; 2730 /* printf("[%d] recv from [%d]\n",rank,i); */ 2731 } 2732 } 2733 } else if (psubcomm->type == PETSC_SUBCOMM_CONTIGUOUS) { 2734 /* --------------------------------------------------*/ 2735 /* ---------- new imples: use MatGetSubMatrices() ------------*/ 2736 PetscBool flg=PETSC_FALSE; 2737 ierr = PetscOptionsGetBool(NULL,"-new",&flg,NULL);CHKERRQ(ierr); 2738 if (flg) { 2739 Mat *matseq; 2740 IS isrow,iscol; 2741 PetscInt mloc_sub,rstart,rend; 2742 2743 /* create redundant matrix */ 2744 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2745 ierr = MatSetSizes(C,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2746 ierr = MatSetUp(C);CHKERRQ(ierr); 2747 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2748 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2749 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2750 2751 ierr = MatGetOwnershipRange(C,&rstart,&rend);CHKERRQ(ierr); 2752 mloc_sub = rend - rstart; 2753 2754 printf("[%d] Use MatGetSubMatrices()...rows %d - %d, mloc_sub %d\n",rank,rstart,rend,mloc_sub); 2755 ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr); 2756 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr); 2757 2758 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&matseq);CHKERRQ(ierr); 2759 if (rank==1) { 2760 printf("[%d] matsub:\n",rank); 2761 ierr = MatView(matseq[0],PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 2762 } 2763 2764 /* Create redundant matrix by concatenating local sequential 2765 matrices from processors in this subcomm */ 2766 ierr = MatDestroy(&C);CHKERRQ(ierr); 2767 2768 ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,MAT_INITIAL_MATRIX,&C);CHKERRQ(ierr); 2769 *matredundant = C; 2770 2771 if (nsubcomm == 1) { 2772 if (!rank) printf( "C\n"); 2773 ierr = MatView(C,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 2774 } 2775 2776 ierr = MatDestroy(&matseq[0]);CHKERRQ(ierr); 2777 ierr = PetscFree(matseq);CHKERRQ(ierr); 2778 ierr = ISDestroy(&isrow);CHKERRQ(ierr); 2779 ierr = ISDestroy(&iscol);CHKERRQ(ierr); 2780 //ierr = MPI_Barrier(comm);CHKERRQ(ierr); 2781 //SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No done yet"); 2782 PetscFunctionReturn(0); 2783 } 2784 2785 /*---------------------------------- */ 2786 PetscInt color,subcommstart; 2787 subcommstart=0; 2788 for (color=0; color<nsubcomm; color++) { 2789 if (psubcomm->color != color) { 2790 for (i=0; i<psubcomm->subsize[color]; i++) { 2791 if (subrank == i) { /* my_subrank == other's subrank */ 2792 send_rank[nsends++] = subcommstart+i; 2793 recv_rank[nrecvs++] = subcommstart+i; 2794 /* printf("[%d] send to and recv from [%d]\n",rank,subcommstart+i); */ 2795 } 2796 } 2797 } 2798 subcommstart += psubcomm->subsize[color]; 2799 } 2800 if (nleftover && subrank == size/nsubcomm) { /* this proc is a leftover proc, send to subcomm that does not have leftover proc */ 2801 subcommstart=0; 2802 for (color=0; color<nsubcomm; color++) { 2803 subcommstart += psubcomm->subsize[color]; 2804 if (psubcomm->color == color) continue; 2805 if (psubcomm->subsize[color] == size/nsubcomm) { /* subcomm does not have leftover proc */ 2806 send_rank[nsends++] = subcommstart -1; /* send to the last proc of subcomm[color] */ 2807 /* printf("[%d] leftover send to [%d] \n",rank,subcommstart -1); */ 2808 } 2809 } 2810 } 2811 2812 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */ 2813 subcommstart=0; 2814 for (color=0; color<nsubcomm; color++) { 2815 subcommstart += psubcomm->subsize[color]; 2816 if (psubcomm->subsize[color] > size/nsubcomm) { /* subcomm has leftover proc */ 2817 recv_rank[nrecvs++] = subcommstart -1; /* recv from the last proc of subcomm[color] */ 2818 /* printf("[%d] recv from [%d]\n",rank,subcommstart -1); */ 2819 } 2820 } 2821 } 2822 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for PetscSubcomm type %D",psubcomm->type); 2823 2824 /* allocate sbuf_j, sbuf_a */ 2825 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2826 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2827 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2828 /* 2829 ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr); 2830 ierr = PetscSynchronizedFlush(comm);CHKERRQ(ierr); 2831 */ 2832 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2833 2834 /* copy mat's local entries into the buffers */ 2835 if (reuse == MAT_INITIAL_MATRIX) { 2836 rownz_max = 0; 2837 rptr = sbuf_j; 2838 cols = sbuf_j + rend-rstart + 1; 2839 vals = sbuf_a; 2840 rptr[0] = 0; 2841 for (i=0; i<rend-rstart; i++) { 2842 row = i + rstart; 2843 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2844 ncols = nzA + nzB; 2845 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2846 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2847 /* load the column indices for this row into cols */ 2848 lwrite = 0; 2849 for (l=0; l<nzB; l++) { 2850 if ((ctmp = bmap[cworkB[l]]) < cstart) { 2851 vals[lwrite] = aworkB[l]; 2852 cols[lwrite++] = ctmp; 2853 } 2854 } 2855 for (l=0; l<nzA; l++) { 2856 vals[lwrite] = aworkA[l]; 2857 cols[lwrite++] = cstart + cworkA[l]; 2858 } 2859 for (l=0; l<nzB; l++) { 2860 if ((ctmp = bmap[cworkB[l]]) >= cend) { 2861 vals[lwrite] = aworkB[l]; 2862 cols[lwrite++] = ctmp; 2863 } 2864 } 2865 vals += ncols; 2866 cols += ncols; 2867 rptr[i+1] = rptr[i] + ncols; 2868 if (rownz_max < ncols) rownz_max = ncols; 2869 } 2870 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2871 } else { /* only copy matrix values into sbuf_a */ 2872 rptr = sbuf_j; 2873 vals = sbuf_a; 2874 rptr[0] = 0; 2875 for (i=0; i<rend-rstart; i++) { 2876 row = i + rstart; 2877 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2878 ncols = nzA + nzB; 2879 cworkB = b->j + b->i[i]; 2880 aworkA = a->a + a->i[i]; 2881 aworkB = b->a + b->i[i]; 2882 lwrite = 0; 2883 for (l=0; l<nzB; l++) { 2884 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2885 } 2886 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2887 for (l=0; l<nzB; l++) { 2888 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2889 } 2890 vals += ncols; 2891 rptr[i+1] = rptr[i] + ncols; 2892 } 2893 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2894 2895 /* send nzlocal to others, and recv other's nzlocal */ 2896 /*--------------------------------------------------*/ 2897 if (reuse == MAT_INITIAL_MATRIX) { 2898 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2899 2900 s_waits2 = s_waits3 + nsends; 2901 s_waits1 = s_waits2 + nsends; 2902 r_waits1 = s_waits1 + nsends; 2903 r_waits2 = r_waits1 + nrecvs; 2904 r_waits3 = r_waits2 + nrecvs; 2905 } else { 2906 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2907 2908 r_waits3 = s_waits3 + nsends; 2909 } 2910 2911 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2912 if (reuse == MAT_INITIAL_MATRIX) { 2913 /* get new tags to keep the communication clean */ 2914 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2915 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2916 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2917 2918 /* post receives of other's nzlocal */ 2919 for (i=0; i<nrecvs; i++) { 2920 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2921 } 2922 /* send nzlocal to others */ 2923 for (i=0; i<nsends; i++) { 2924 sbuf_nz[i] = nzlocal; 2925 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2926 } 2927 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2928 count = nrecvs; 2929 while (count) { 2930 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2931 2932 recv_rank[imdex] = recv_status.MPI_SOURCE; 2933 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2934 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2935 2936 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2937 2938 rbuf_nz[imdex] += i + 2; 2939 2940 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2941 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2942 count--; 2943 } 2944 /* wait on sends of nzlocal */ 2945 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2946 /* send mat->i,j to others, and recv from other's */ 2947 /*------------------------------------------------*/ 2948 for (i=0; i<nsends; i++) { 2949 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2950 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2951 } 2952 /* wait on receives of mat->i,j */ 2953 /*------------------------------*/ 2954 count = nrecvs; 2955 while (count) { 2956 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2957 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2958 count--; 2959 } 2960 /* wait on sends of mat->i,j */ 2961 /*---------------------------*/ 2962 if (nsends) { 2963 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2964 } 2965 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2966 2967 /* post receives, send and receive mat->a */ 2968 /*----------------------------------------*/ 2969 for (imdex=0; imdex<nrecvs; imdex++) { 2970 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2971 } 2972 for (i=0; i<nsends; i++) { 2973 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2974 } 2975 count = nrecvs; 2976 while (count) { 2977 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2978 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2979 count--; 2980 } 2981 if (nsends) { 2982 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2983 } 2984 2985 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2986 2987 /* create redundant matrix */ 2988 /*-------------------------*/ 2989 if (reuse == MAT_INITIAL_MATRIX) { 2990 const PetscInt *range; 2991 PetscInt rstart_sub,rend_sub,mloc_sub; 2992 2993 /* compute rownz_max for preallocation */ 2994 for (imdex=0; imdex<nrecvs; imdex++) { 2995 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2996 rptr = rbuf_j[imdex]; 2997 for (i=0; i<j; i++) { 2998 ncols = rptr[i+1] - rptr[i]; 2999 if (rownz_max < ncols) rownz_max = ncols; 3000 } 3001 } 3002 3003 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 3004 3005 /* get local size of redundant matrix 3006 - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */ 3007 ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr); 3008 rstart_sub = range[nsubcomm*subrank]; 3009 if (subrank+1 < subsize) { /* not the last proc in subcomm */ 3010 rend_sub = range[nsubcomm*(subrank+1)]; 3011 } else { 3012 rend_sub = mat->rmap->N; 3013 } 3014 mloc_sub = rend_sub - rstart_sub; 3015 3016 if (M == N) { 3017 ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 3018 } else { /* non-square matrix */ 3019 ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr); 3020 } 3021 ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr); 3022 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 3023 ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr); 3024 ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr); 3025 } else { 3026 C = *matredundant; 3027 } 3028 3029 /* insert local matrix entries */ 3030 rptr = sbuf_j; 3031 cols = sbuf_j + rend-rstart + 1; 3032 vals = sbuf_a; 3033 for (i=0; i<rend-rstart; i++) { 3034 row = i + rstart; 3035 ncols = rptr[i+1] - rptr[i]; 3036 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3037 vals += ncols; 3038 cols += ncols; 3039 } 3040 /* insert received matrix entries */ 3041 for (imdex=0; imdex<nrecvs; imdex++) { 3042 rstart = rowrange[recv_rank[imdex]]; 3043 rend = rowrange[recv_rank[imdex]+1]; 3044 /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */ 3045 rptr = rbuf_j[imdex]; 3046 cols = rbuf_j[imdex] + rend-rstart + 1; 3047 vals = rbuf_a[imdex]; 3048 for (i=0; i<rend-rstart; i++) { 3049 row = i + rstart; 3050 ncols = rptr[i+1] - rptr[i]; 3051 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3052 vals += ncols; 3053 cols += ncols; 3054 } 3055 } 3056 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3057 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3058 3059 if (reuse == MAT_INITIAL_MATRIX) { 3060 *matredundant = C; 3061 3062 /* create a supporting struct and attach it to C for reuse */ 3063 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 3064 if (subsize == 1) { 3065 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 3066 c->redundant = redund; 3067 } else { 3068 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 3069 c->redundant = redund; 3070 } 3071 3072 redund->nzlocal = nzlocal; 3073 redund->nsends = nsends; 3074 redund->nrecvs = nrecvs; 3075 redund->send_rank = send_rank; 3076 redund->recv_rank = recv_rank; 3077 redund->sbuf_nz = sbuf_nz; 3078 redund->rbuf_nz = rbuf_nz; 3079 redund->sbuf_j = sbuf_j; 3080 redund->sbuf_a = sbuf_a; 3081 redund->rbuf_j = rbuf_j; 3082 redund->rbuf_a = rbuf_a; 3083 redund->psubcomm = NULL; 3084 3085 redund->Destroy = C->ops->destroy; 3086 C->ops->destroy = MatDestroy_MatRedundant; 3087 } 3088 PetscFunctionReturn(0); 3089 } 3090 3091 #undef __FUNCT__ 3092 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 3093 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 3094 { 3095 PetscErrorCode ierr; 3096 3097 PetscFunctionBegin; 3098 if (subcomm == MPI_COMM_NULL || subcomm == PETSC_COMM_SELF) { /* create psubcomm */ 3099 MPI_Comm comm; 3100 PetscSubcomm psubcomm; 3101 PetscMPIInt size,subsize; 3102 3103 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3104 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3105 ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr); 3106 ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr); 3107 ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_INTERLACED);CHKERRQ(ierr); 3108 ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr); 3109 3110 ierr = MatGetRedundantMatrix_MPIAIJ_psubcomm(mat,nsubcomm,psubcomm,reuse,matredundant);CHKERRQ(ierr); 3111 3112 /* free psubcomm in MatDestroy_MatRedundant() */ 3113 ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr); 3114 if (subsize == 1) { 3115 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 3116 c->redundant->psubcomm = psubcomm; 3117 } else { 3118 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 3119 c->redundant->psubcomm = psubcomm ; 3120 } 3121 } else { 3122 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet"); 3123 } 3124 PetscFunctionReturn(0); 3125 } 3126 3127 #undef __FUNCT__ 3128 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 3129 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 3130 { 3131 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3132 PetscErrorCode ierr; 3133 PetscInt i,*idxb = 0; 3134 PetscScalar *va,*vb; 3135 Vec vtmp; 3136 3137 PetscFunctionBegin; 3138 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 3139 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 3140 if (idx) { 3141 for (i=0; i<A->rmap->n; i++) { 3142 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 3143 } 3144 } 3145 3146 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 3147 if (idx) { 3148 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 3149 } 3150 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 3151 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 3152 3153 for (i=0; i<A->rmap->n; i++) { 3154 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 3155 va[i] = vb[i]; 3156 if (idx) idx[i] = a->garray[idxb[i]]; 3157 } 3158 } 3159 3160 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 3161 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 3162 ierr = PetscFree(idxb);CHKERRQ(ierr); 3163 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 3164 PetscFunctionReturn(0); 3165 } 3166 3167 #undef __FUNCT__ 3168 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 3169 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 3170 { 3171 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3172 PetscErrorCode ierr; 3173 PetscInt i,*idxb = 0; 3174 PetscScalar *va,*vb; 3175 Vec vtmp; 3176 3177 PetscFunctionBegin; 3178 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 3179 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 3180 if (idx) { 3181 for (i=0; i<A->cmap->n; i++) { 3182 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 3183 } 3184 } 3185 3186 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 3187 if (idx) { 3188 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 3189 } 3190 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 3191 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 3192 3193 for (i=0; i<A->rmap->n; i++) { 3194 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 3195 va[i] = vb[i]; 3196 if (idx) idx[i] = a->garray[idxb[i]]; 3197 } 3198 } 3199 3200 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 3201 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 3202 ierr = PetscFree(idxb);CHKERRQ(ierr); 3203 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 3204 PetscFunctionReturn(0); 3205 } 3206 3207 #undef __FUNCT__ 3208 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 3209 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 3210 { 3211 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 3212 PetscInt n = A->rmap->n; 3213 PetscInt cstart = A->cmap->rstart; 3214 PetscInt *cmap = mat->garray; 3215 PetscInt *diagIdx, *offdiagIdx; 3216 Vec diagV, offdiagV; 3217 PetscScalar *a, *diagA, *offdiagA; 3218 PetscInt r; 3219 PetscErrorCode ierr; 3220 3221 PetscFunctionBegin; 3222 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 3223 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 3224 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 3225 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 3226 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 3227 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 3228 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 3229 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3230 for (r = 0; r < n; ++r) { 3231 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 3232 a[r] = diagA[r]; 3233 idx[r] = cstart + diagIdx[r]; 3234 } else { 3235 a[r] = offdiagA[r]; 3236 idx[r] = cmap[offdiagIdx[r]]; 3237 } 3238 } 3239 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3240 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3241 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3242 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3243 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3244 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3245 PetscFunctionReturn(0); 3246 } 3247 3248 #undef __FUNCT__ 3249 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 3250 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 3251 { 3252 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 3253 PetscInt n = A->rmap->n; 3254 PetscInt cstart = A->cmap->rstart; 3255 PetscInt *cmap = mat->garray; 3256 PetscInt *diagIdx, *offdiagIdx; 3257 Vec diagV, offdiagV; 3258 PetscScalar *a, *diagA, *offdiagA; 3259 PetscInt r; 3260 PetscErrorCode ierr; 3261 3262 PetscFunctionBegin; 3263 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 3264 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 3265 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 3266 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 3267 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 3268 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 3269 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 3270 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3271 for (r = 0; r < n; ++r) { 3272 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 3273 a[r] = diagA[r]; 3274 idx[r] = cstart + diagIdx[r]; 3275 } else { 3276 a[r] = offdiagA[r]; 3277 idx[r] = cmap[offdiagIdx[r]]; 3278 } 3279 } 3280 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3281 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3282 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3283 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3284 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3285 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3286 PetscFunctionReturn(0); 3287 } 3288 3289 #undef __FUNCT__ 3290 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 3291 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 3292 { 3293 PetscErrorCode ierr; 3294 Mat *dummy; 3295 3296 PetscFunctionBegin; 3297 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 3298 *newmat = *dummy; 3299 ierr = PetscFree(dummy);CHKERRQ(ierr); 3300 PetscFunctionReturn(0); 3301 } 3302 3303 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 3304 3305 #undef __FUNCT__ 3306 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 3307 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3308 { 3309 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3310 PetscErrorCode ierr; 3311 3312 PetscFunctionBegin; 3313 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3314 PetscFunctionReturn(0); 3315 } 3316 3317 #undef __FUNCT__ 3318 #define __FUNCT__ "MatSetRandom_MPIAIJ" 3319 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 3320 { 3321 PetscErrorCode ierr; 3322 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 3323 3324 PetscFunctionBegin; 3325 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 3326 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 3327 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3328 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3329 PetscFunctionReturn(0); 3330 } 3331 3332 /* -------------------------------------------------------------------*/ 3333 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3334 MatGetRow_MPIAIJ, 3335 MatRestoreRow_MPIAIJ, 3336 MatMult_MPIAIJ, 3337 /* 4*/ MatMultAdd_MPIAIJ, 3338 MatMultTranspose_MPIAIJ, 3339 MatMultTransposeAdd_MPIAIJ, 3340 #if defined(PETSC_HAVE_PBGL) 3341 MatSolve_MPIAIJ, 3342 #else 3343 0, 3344 #endif 3345 0, 3346 0, 3347 /*10*/ 0, 3348 0, 3349 0, 3350 MatSOR_MPIAIJ, 3351 MatTranspose_MPIAIJ, 3352 /*15*/ MatGetInfo_MPIAIJ, 3353 MatEqual_MPIAIJ, 3354 MatGetDiagonal_MPIAIJ, 3355 MatDiagonalScale_MPIAIJ, 3356 MatNorm_MPIAIJ, 3357 /*20*/ MatAssemblyBegin_MPIAIJ, 3358 MatAssemblyEnd_MPIAIJ, 3359 MatSetOption_MPIAIJ, 3360 MatZeroEntries_MPIAIJ, 3361 /*24*/ MatZeroRows_MPIAIJ, 3362 0, 3363 #if defined(PETSC_HAVE_PBGL) 3364 0, 3365 #else 3366 0, 3367 #endif 3368 0, 3369 0, 3370 /*29*/ MatSetUp_MPIAIJ, 3371 #if defined(PETSC_HAVE_PBGL) 3372 0, 3373 #else 3374 0, 3375 #endif 3376 0, 3377 0, 3378 0, 3379 /*34*/ MatDuplicate_MPIAIJ, 3380 0, 3381 0, 3382 0, 3383 0, 3384 /*39*/ MatAXPY_MPIAIJ, 3385 MatGetSubMatrices_MPIAIJ, 3386 MatIncreaseOverlap_MPIAIJ, 3387 MatGetValues_MPIAIJ, 3388 MatCopy_MPIAIJ, 3389 /*44*/ MatGetRowMax_MPIAIJ, 3390 MatScale_MPIAIJ, 3391 0, 3392 0, 3393 MatZeroRowsColumns_MPIAIJ, 3394 /*49*/ MatSetRandom_MPIAIJ, 3395 0, 3396 0, 3397 0, 3398 0, 3399 /*54*/ MatFDColoringCreate_MPIAIJ, 3400 0, 3401 MatSetUnfactored_MPIAIJ, 3402 MatPermute_MPIAIJ, 3403 0, 3404 /*59*/ MatGetSubMatrix_MPIAIJ, 3405 MatDestroy_MPIAIJ, 3406 MatView_MPIAIJ, 3407 0, 3408 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 3409 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 3410 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 3411 0, 3412 0, 3413 0, 3414 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3415 MatGetRowMinAbs_MPIAIJ, 3416 0, 3417 MatSetColoring_MPIAIJ, 3418 0, 3419 MatSetValuesAdifor_MPIAIJ, 3420 /*75*/ MatFDColoringApply_AIJ, 3421 0, 3422 0, 3423 0, 3424 MatFindZeroDiagonals_MPIAIJ, 3425 /*80*/ 0, 3426 0, 3427 0, 3428 /*83*/ MatLoad_MPIAIJ, 3429 0, 3430 0, 3431 0, 3432 0, 3433 0, 3434 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3435 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3436 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3437 MatPtAP_MPIAIJ_MPIAIJ, 3438 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3439 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 3440 0, 3441 0, 3442 0, 3443 0, 3444 /*99*/ 0, 3445 0, 3446 0, 3447 MatConjugate_MPIAIJ, 3448 0, 3449 /*104*/MatSetValuesRow_MPIAIJ, 3450 MatRealPart_MPIAIJ, 3451 MatImaginaryPart_MPIAIJ, 3452 0, 3453 0, 3454 /*109*/0, 3455 MatGetRedundantMatrix_MPIAIJ, 3456 MatGetRowMin_MPIAIJ, 3457 0, 3458 0, 3459 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3460 0, 3461 0, 3462 0, 3463 0, 3464 /*119*/0, 3465 0, 3466 0, 3467 0, 3468 MatGetMultiProcBlock_MPIAIJ, 3469 /*124*/MatFindNonzeroRows_MPIAIJ, 3470 MatGetColumnNorms_MPIAIJ, 3471 MatInvertBlockDiagonal_MPIAIJ, 3472 0, 3473 MatGetSubMatricesParallel_MPIAIJ, 3474 /*129*/0, 3475 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3476 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3477 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3478 0, 3479 /*134*/0, 3480 0, 3481 0, 3482 0, 3483 0, 3484 /*139*/0, 3485 0 3486 }; 3487 3488 /* ----------------------------------------------------------------------------------------*/ 3489 3490 #undef __FUNCT__ 3491 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3492 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3493 { 3494 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3495 PetscErrorCode ierr; 3496 3497 PetscFunctionBegin; 3498 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3499 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3500 PetscFunctionReturn(0); 3501 } 3502 3503 #undef __FUNCT__ 3504 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3505 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3506 { 3507 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3508 PetscErrorCode ierr; 3509 3510 PetscFunctionBegin; 3511 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3512 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3513 PetscFunctionReturn(0); 3514 } 3515 3516 #undef __FUNCT__ 3517 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3518 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3519 { 3520 Mat_MPIAIJ *b; 3521 PetscErrorCode ierr; 3522 3523 PetscFunctionBegin; 3524 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3525 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3526 b = (Mat_MPIAIJ*)B->data; 3527 3528 if (!B->preallocated) { 3529 /* Explicitly create 2 MATSEQAIJ matrices. */ 3530 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3531 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3532 ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3533 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3534 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3535 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3536 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3537 ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3538 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3539 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3540 } 3541 3542 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3543 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3544 B->preallocated = PETSC_TRUE; 3545 PetscFunctionReturn(0); 3546 } 3547 3548 #undef __FUNCT__ 3549 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3550 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3551 { 3552 Mat mat; 3553 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3554 PetscErrorCode ierr; 3555 3556 PetscFunctionBegin; 3557 *newmat = 0; 3558 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3559 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3560 ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr); 3561 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3562 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3563 a = (Mat_MPIAIJ*)mat->data; 3564 3565 mat->factortype = matin->factortype; 3566 mat->rmap->bs = matin->rmap->bs; 3567 mat->cmap->bs = matin->cmap->bs; 3568 mat->assembled = PETSC_TRUE; 3569 mat->insertmode = NOT_SET_VALUES; 3570 mat->preallocated = PETSC_TRUE; 3571 3572 a->size = oldmat->size; 3573 a->rank = oldmat->rank; 3574 a->donotstash = oldmat->donotstash; 3575 a->roworiented = oldmat->roworiented; 3576 a->rowindices = 0; 3577 a->rowvalues = 0; 3578 a->getrowactive = PETSC_FALSE; 3579 3580 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3581 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3582 3583 if (oldmat->colmap) { 3584 #if defined(PETSC_USE_CTABLE) 3585 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3586 #else 3587 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3588 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3589 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3590 #endif 3591 } else a->colmap = 0; 3592 if (oldmat->garray) { 3593 PetscInt len; 3594 len = oldmat->B->cmap->n; 3595 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3596 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3597 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3598 } else a->garray = 0; 3599 3600 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3601 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3602 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3603 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3604 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3605 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3606 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3607 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3608 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3609 *newmat = mat; 3610 PetscFunctionReturn(0); 3611 } 3612 3613 3614 3615 #undef __FUNCT__ 3616 #define __FUNCT__ "MatLoad_MPIAIJ" 3617 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3618 { 3619 PetscScalar *vals,*svals; 3620 MPI_Comm comm; 3621 PetscErrorCode ierr; 3622 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3623 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3624 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3625 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 3626 PetscInt cend,cstart,n,*rowners,sizesset=1; 3627 int fd; 3628 PetscInt bs = 1; 3629 3630 PetscFunctionBegin; 3631 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3632 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3633 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3634 if (!rank) { 3635 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3636 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3637 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3638 } 3639 3640 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3641 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3642 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3643 3644 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3645 3646 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3647 M = header[1]; N = header[2]; 3648 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3649 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3650 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3651 3652 /* If global sizes are set, check if they are consistent with that given in the file */ 3653 if (sizesset) { 3654 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3655 } 3656 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3657 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3658 3659 /* determine ownership of all (block) rows */ 3660 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3661 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3662 else m = newMat->rmap->n; /* Set by user */ 3663 3664 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3665 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3666 3667 /* First process needs enough room for process with most rows */ 3668 if (!rank) { 3669 mmax = rowners[1]; 3670 for (i=2; i<=size; i++) { 3671 mmax = PetscMax(mmax, rowners[i]); 3672 } 3673 } else mmax = -1; /* unused, but compilers complain */ 3674 3675 rowners[0] = 0; 3676 for (i=2; i<=size; i++) { 3677 rowners[i] += rowners[i-1]; 3678 } 3679 rstart = rowners[rank]; 3680 rend = rowners[rank+1]; 3681 3682 /* distribute row lengths to all processors */ 3683 ierr = PetscMalloc2(m,PetscInt,&ourlens,m,PetscInt,&offlens);CHKERRQ(ierr); 3684 if (!rank) { 3685 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3686 ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3687 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3688 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3689 for (j=0; j<m; j++) { 3690 procsnz[0] += ourlens[j]; 3691 } 3692 for (i=1; i<size; i++) { 3693 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3694 /* calculate the number of nonzeros on each processor */ 3695 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3696 procsnz[i] += rowlengths[j]; 3697 } 3698 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3699 } 3700 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3701 } else { 3702 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3703 } 3704 3705 if (!rank) { 3706 /* determine max buffer needed and allocate it */ 3707 maxnz = 0; 3708 for (i=0; i<size; i++) { 3709 maxnz = PetscMax(maxnz,procsnz[i]); 3710 } 3711 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3712 3713 /* read in my part of the matrix column indices */ 3714 nz = procsnz[0]; 3715 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3716 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3717 3718 /* read in every one elses and ship off */ 3719 for (i=1; i<size; i++) { 3720 nz = procsnz[i]; 3721 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3722 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3723 } 3724 ierr = PetscFree(cols);CHKERRQ(ierr); 3725 } else { 3726 /* determine buffer space needed for message */ 3727 nz = 0; 3728 for (i=0; i<m; i++) { 3729 nz += ourlens[i]; 3730 } 3731 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3732 3733 /* receive message of column indices*/ 3734 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3735 } 3736 3737 /* determine column ownership if matrix is not square */ 3738 if (N != M) { 3739 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3740 else n = newMat->cmap->n; 3741 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3742 cstart = cend - n; 3743 } else { 3744 cstart = rstart; 3745 cend = rend; 3746 n = cend - cstart; 3747 } 3748 3749 /* loop over local rows, determining number of off diagonal entries */ 3750 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3751 jj = 0; 3752 for (i=0; i<m; i++) { 3753 for (j=0; j<ourlens[i]; j++) { 3754 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3755 jj++; 3756 } 3757 } 3758 3759 for (i=0; i<m; i++) { 3760 ourlens[i] -= offlens[i]; 3761 } 3762 if (!sizesset) { 3763 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3764 } 3765 3766 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3767 3768 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3769 3770 for (i=0; i<m; i++) { 3771 ourlens[i] += offlens[i]; 3772 } 3773 3774 if (!rank) { 3775 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3776 3777 /* read in my part of the matrix numerical values */ 3778 nz = procsnz[0]; 3779 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3780 3781 /* insert into matrix */ 3782 jj = rstart; 3783 smycols = mycols; 3784 svals = vals; 3785 for (i=0; i<m; i++) { 3786 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3787 smycols += ourlens[i]; 3788 svals += ourlens[i]; 3789 jj++; 3790 } 3791 3792 /* read in other processors and ship out */ 3793 for (i=1; i<size; i++) { 3794 nz = procsnz[i]; 3795 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3796 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3797 } 3798 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3799 } else { 3800 /* receive numeric values */ 3801 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3802 3803 /* receive message of values*/ 3804 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3805 3806 /* insert into matrix */ 3807 jj = rstart; 3808 smycols = mycols; 3809 svals = vals; 3810 for (i=0; i<m; i++) { 3811 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3812 smycols += ourlens[i]; 3813 svals += ourlens[i]; 3814 jj++; 3815 } 3816 } 3817 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3818 ierr = PetscFree(vals);CHKERRQ(ierr); 3819 ierr = PetscFree(mycols);CHKERRQ(ierr); 3820 ierr = PetscFree(rowners);CHKERRQ(ierr); 3821 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3822 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3823 PetscFunctionReturn(0); 3824 } 3825 3826 #undef __FUNCT__ 3827 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3828 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3829 { 3830 PetscErrorCode ierr; 3831 IS iscol_local; 3832 PetscInt csize; 3833 3834 PetscFunctionBegin; 3835 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3836 if (call == MAT_REUSE_MATRIX) { 3837 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3838 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3839 } else { 3840 PetscInt cbs; 3841 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3842 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3843 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3844 } 3845 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3846 if (call == MAT_INITIAL_MATRIX) { 3847 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3848 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3849 } 3850 PetscFunctionReturn(0); 3851 } 3852 3853 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3854 #undef __FUNCT__ 3855 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3856 /* 3857 Not great since it makes two copies of the submatrix, first an SeqAIJ 3858 in local and then by concatenating the local matrices the end result. 3859 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3860 3861 Note: This requires a sequential iscol with all indices. 3862 */ 3863 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3864 { 3865 PetscErrorCode ierr; 3866 PetscMPIInt rank,size; 3867 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3868 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3869 PetscBool allcolumns, colflag; 3870 Mat M,Mreuse; 3871 MatScalar *vwork,*aa; 3872 MPI_Comm comm; 3873 Mat_SeqAIJ *aij; 3874 3875 PetscFunctionBegin; 3876 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3877 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3878 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3879 3880 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3881 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3882 if (colflag && ncol == mat->cmap->N) { 3883 allcolumns = PETSC_TRUE; 3884 } else { 3885 allcolumns = PETSC_FALSE; 3886 } 3887 if (call == MAT_REUSE_MATRIX) { 3888 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3889 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3890 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3891 } else { 3892 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3893 } 3894 3895 /* 3896 m - number of local rows 3897 n - number of columns (same on all processors) 3898 rstart - first row in new global matrix generated 3899 */ 3900 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3901 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3902 if (call == MAT_INITIAL_MATRIX) { 3903 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3904 ii = aij->i; 3905 jj = aij->j; 3906 3907 /* 3908 Determine the number of non-zeros in the diagonal and off-diagonal 3909 portions of the matrix in order to do correct preallocation 3910 */ 3911 3912 /* first get start and end of "diagonal" columns */ 3913 if (csize == PETSC_DECIDE) { 3914 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3915 if (mglobal == n) { /* square matrix */ 3916 nlocal = m; 3917 } else { 3918 nlocal = n/size + ((n % size) > rank); 3919 } 3920 } else { 3921 nlocal = csize; 3922 } 3923 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3924 rstart = rend - nlocal; 3925 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3926 3927 /* next, compute all the lengths */ 3928 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3929 olens = dlens + m; 3930 for (i=0; i<m; i++) { 3931 jend = ii[i+1] - ii[i]; 3932 olen = 0; 3933 dlen = 0; 3934 for (j=0; j<jend; j++) { 3935 if (*jj < rstart || *jj >= rend) olen++; 3936 else dlen++; 3937 jj++; 3938 } 3939 olens[i] = olen; 3940 dlens[i] = dlen; 3941 } 3942 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3943 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3944 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3945 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3946 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3947 ierr = PetscFree(dlens);CHKERRQ(ierr); 3948 } else { 3949 PetscInt ml,nl; 3950 3951 M = *newmat; 3952 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3953 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3954 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3955 /* 3956 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3957 rather than the slower MatSetValues(). 3958 */ 3959 M->was_assembled = PETSC_TRUE; 3960 M->assembled = PETSC_FALSE; 3961 } 3962 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3963 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3964 ii = aij->i; 3965 jj = aij->j; 3966 aa = aij->a; 3967 for (i=0; i<m; i++) { 3968 row = rstart + i; 3969 nz = ii[i+1] - ii[i]; 3970 cwork = jj; jj += nz; 3971 vwork = aa; aa += nz; 3972 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3973 } 3974 3975 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3976 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3977 *newmat = M; 3978 3979 /* save submatrix used in processor for next request */ 3980 if (call == MAT_INITIAL_MATRIX) { 3981 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3982 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3983 } 3984 PetscFunctionReturn(0); 3985 } 3986 3987 #undef __FUNCT__ 3988 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3989 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3990 { 3991 PetscInt m,cstart, cend,j,nnz,i,d; 3992 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3993 const PetscInt *JJ; 3994 PetscScalar *values; 3995 PetscErrorCode ierr; 3996 3997 PetscFunctionBegin; 3998 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3999 4000 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 4001 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 4002 m = B->rmap->n; 4003 cstart = B->cmap->rstart; 4004 cend = B->cmap->rend; 4005 rstart = B->rmap->rstart; 4006 4007 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 4008 4009 #if defined(PETSC_USE_DEBUGGING) 4010 for (i=0; i<m; i++) { 4011 nnz = Ii[i+1]- Ii[i]; 4012 JJ = J + Ii[i]; 4013 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 4014 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 4015 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 4016 } 4017 #endif 4018 4019 for (i=0; i<m; i++) { 4020 nnz = Ii[i+1]- Ii[i]; 4021 JJ = J + Ii[i]; 4022 nnz_max = PetscMax(nnz_max,nnz); 4023 d = 0; 4024 for (j=0; j<nnz; j++) { 4025 if (cstart <= JJ[j] && JJ[j] < cend) d++; 4026 } 4027 d_nnz[i] = d; 4028 o_nnz[i] = nnz - d; 4029 } 4030 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 4031 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 4032 4033 if (v) values = (PetscScalar*)v; 4034 else { 4035 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 4036 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 4037 } 4038 4039 for (i=0; i<m; i++) { 4040 ii = i + rstart; 4041 nnz = Ii[i+1]- Ii[i]; 4042 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 4043 } 4044 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4045 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4046 4047 if (!v) { 4048 ierr = PetscFree(values);CHKERRQ(ierr); 4049 } 4050 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 4051 PetscFunctionReturn(0); 4052 } 4053 4054 #undef __FUNCT__ 4055 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 4056 /*@ 4057 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 4058 (the default parallel PETSc format). 4059 4060 Collective on MPI_Comm 4061 4062 Input Parameters: 4063 + B - the matrix 4064 . i - the indices into j for the start of each local row (starts with zero) 4065 . j - the column indices for each local row (starts with zero) 4066 - v - optional values in the matrix 4067 4068 Level: developer 4069 4070 Notes: 4071 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4072 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4073 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4074 4075 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4076 4077 The format which is used for the sparse matrix input, is equivalent to a 4078 row-major ordering.. i.e for the following matrix, the input data expected is 4079 as shown: 4080 4081 1 0 0 4082 2 0 3 P0 4083 ------- 4084 4 5 6 P1 4085 4086 Process0 [P0]: rows_owned=[0,1] 4087 i = {0,1,3} [size = nrow+1 = 2+1] 4088 j = {0,0,2} [size = nz = 6] 4089 v = {1,2,3} [size = nz = 6] 4090 4091 Process1 [P1]: rows_owned=[2] 4092 i = {0,3} [size = nrow+1 = 1+1] 4093 j = {0,1,2} [size = nz = 6] 4094 v = {4,5,6} [size = nz = 6] 4095 4096 .keywords: matrix, aij, compressed row, sparse, parallel 4097 4098 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 4099 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 4100 @*/ 4101 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 4102 { 4103 PetscErrorCode ierr; 4104 4105 PetscFunctionBegin; 4106 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 4107 PetscFunctionReturn(0); 4108 } 4109 4110 #undef __FUNCT__ 4111 #define __FUNCT__ "MatMPIAIJSetPreallocation" 4112 /*@C 4113 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 4114 (the default parallel PETSc format). For good matrix assembly performance 4115 the user should preallocate the matrix storage by setting the parameters 4116 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4117 performance can be increased by more than a factor of 50. 4118 4119 Collective on MPI_Comm 4120 4121 Input Parameters: 4122 + A - the matrix 4123 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4124 (same value is used for all local rows) 4125 . d_nnz - array containing the number of nonzeros in the various rows of the 4126 DIAGONAL portion of the local submatrix (possibly different for each row) 4127 or NULL, if d_nz is used to specify the nonzero structure. 4128 The size of this array is equal to the number of local rows, i.e 'm'. 4129 For matrices that will be factored, you must leave room for (and set) 4130 the diagonal entry even if it is zero. 4131 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4132 submatrix (same value is used for all local rows). 4133 - o_nnz - array containing the number of nonzeros in the various rows of the 4134 OFF-DIAGONAL portion of the local submatrix (possibly different for 4135 each row) or NULL, if o_nz is used to specify the nonzero 4136 structure. The size of this array is equal to the number 4137 of local rows, i.e 'm'. 4138 4139 If the *_nnz parameter is given then the *_nz parameter is ignored 4140 4141 The AIJ format (also called the Yale sparse matrix format or 4142 compressed row storage (CSR)), is fully compatible with standard Fortran 77 4143 storage. The stored row and column indices begin with zero. 4144 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 4145 4146 The parallel matrix is partitioned such that the first m0 rows belong to 4147 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4148 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4149 4150 The DIAGONAL portion of the local submatrix of a processor can be defined 4151 as the submatrix which is obtained by extraction the part corresponding to 4152 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4153 first row that belongs to the processor, r2 is the last row belonging to 4154 the this processor, and c1-c2 is range of indices of the local part of a 4155 vector suitable for applying the matrix to. This is an mxn matrix. In the 4156 common case of a square matrix, the row and column ranges are the same and 4157 the DIAGONAL part is also square. The remaining portion of the local 4158 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4159 4160 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4161 4162 You can call MatGetInfo() to get information on how effective the preallocation was; 4163 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4164 You can also run with the option -info and look for messages with the string 4165 malloc in them to see if additional memory allocation was needed. 4166 4167 Example usage: 4168 4169 Consider the following 8x8 matrix with 34 non-zero values, that is 4170 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4171 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4172 as follows: 4173 4174 .vb 4175 1 2 0 | 0 3 0 | 0 4 4176 Proc0 0 5 6 | 7 0 0 | 8 0 4177 9 0 10 | 11 0 0 | 12 0 4178 ------------------------------------- 4179 13 0 14 | 15 16 17 | 0 0 4180 Proc1 0 18 0 | 19 20 21 | 0 0 4181 0 0 0 | 22 23 0 | 24 0 4182 ------------------------------------- 4183 Proc2 25 26 27 | 0 0 28 | 29 0 4184 30 0 0 | 31 32 33 | 0 34 4185 .ve 4186 4187 This can be represented as a collection of submatrices as: 4188 4189 .vb 4190 A B C 4191 D E F 4192 G H I 4193 .ve 4194 4195 Where the submatrices A,B,C are owned by proc0, D,E,F are 4196 owned by proc1, G,H,I are owned by proc2. 4197 4198 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4199 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4200 The 'M','N' parameters are 8,8, and have the same values on all procs. 4201 4202 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4203 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4204 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4205 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4206 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4207 matrix, ans [DF] as another SeqAIJ matrix. 4208 4209 When d_nz, o_nz parameters are specified, d_nz storage elements are 4210 allocated for every row of the local diagonal submatrix, and o_nz 4211 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4212 One way to choose d_nz and o_nz is to use the max nonzerors per local 4213 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4214 In this case, the values of d_nz,o_nz are: 4215 .vb 4216 proc0 : dnz = 2, o_nz = 2 4217 proc1 : dnz = 3, o_nz = 2 4218 proc2 : dnz = 1, o_nz = 4 4219 .ve 4220 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4221 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4222 for proc3. i.e we are using 12+15+10=37 storage locations to store 4223 34 values. 4224 4225 When d_nnz, o_nnz parameters are specified, the storage is specified 4226 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4227 In the above case the values for d_nnz,o_nnz are: 4228 .vb 4229 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4230 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4231 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4232 .ve 4233 Here the space allocated is sum of all the above values i.e 34, and 4234 hence pre-allocation is perfect. 4235 4236 Level: intermediate 4237 4238 .keywords: matrix, aij, compressed row, sparse, parallel 4239 4240 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4241 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 4242 @*/ 4243 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4244 { 4245 PetscErrorCode ierr; 4246 4247 PetscFunctionBegin; 4248 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4249 PetscValidType(B,1); 4250 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4251 PetscFunctionReturn(0); 4252 } 4253 4254 #undef __FUNCT__ 4255 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 4256 /*@ 4257 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4258 CSR format the local rows. 4259 4260 Collective on MPI_Comm 4261 4262 Input Parameters: 4263 + comm - MPI communicator 4264 . m - number of local rows (Cannot be PETSC_DECIDE) 4265 . n - This value should be the same as the local size used in creating the 4266 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4267 calculated if N is given) For square matrices n is almost always m. 4268 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4269 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4270 . i - row indices 4271 . j - column indices 4272 - a - matrix values 4273 4274 Output Parameter: 4275 . mat - the matrix 4276 4277 Level: intermediate 4278 4279 Notes: 4280 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4281 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4282 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4283 4284 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4285 4286 The format which is used for the sparse matrix input, is equivalent to a 4287 row-major ordering.. i.e for the following matrix, the input data expected is 4288 as shown: 4289 4290 1 0 0 4291 2 0 3 P0 4292 ------- 4293 4 5 6 P1 4294 4295 Process0 [P0]: rows_owned=[0,1] 4296 i = {0,1,3} [size = nrow+1 = 2+1] 4297 j = {0,0,2} [size = nz = 6] 4298 v = {1,2,3} [size = nz = 6] 4299 4300 Process1 [P1]: rows_owned=[2] 4301 i = {0,3} [size = nrow+1 = 1+1] 4302 j = {0,1,2} [size = nz = 6] 4303 v = {4,5,6} [size = nz = 6] 4304 4305 .keywords: matrix, aij, compressed row, sparse, parallel 4306 4307 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4308 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4309 @*/ 4310 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4311 { 4312 PetscErrorCode ierr; 4313 4314 PetscFunctionBegin; 4315 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4316 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4317 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4318 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4319 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4320 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4321 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4322 PetscFunctionReturn(0); 4323 } 4324 4325 #undef __FUNCT__ 4326 #define __FUNCT__ "MatCreateAIJ" 4327 /*@C 4328 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4329 (the default parallel PETSc format). For good matrix assembly performance 4330 the user should preallocate the matrix storage by setting the parameters 4331 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4332 performance can be increased by more than a factor of 50. 4333 4334 Collective on MPI_Comm 4335 4336 Input Parameters: 4337 + comm - MPI communicator 4338 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4339 This value should be the same as the local size used in creating the 4340 y vector for the matrix-vector product y = Ax. 4341 . n - This value should be the same as the local size used in creating the 4342 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4343 calculated if N is given) For square matrices n is almost always m. 4344 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4345 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4346 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4347 (same value is used for all local rows) 4348 . d_nnz - array containing the number of nonzeros in the various rows of the 4349 DIAGONAL portion of the local submatrix (possibly different for each row) 4350 or NULL, if d_nz is used to specify the nonzero structure. 4351 The size of this array is equal to the number of local rows, i.e 'm'. 4352 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4353 submatrix (same value is used for all local rows). 4354 - o_nnz - array containing the number of nonzeros in the various rows of the 4355 OFF-DIAGONAL portion of the local submatrix (possibly different for 4356 each row) or NULL, if o_nz is used to specify the nonzero 4357 structure. The size of this array is equal to the number 4358 of local rows, i.e 'm'. 4359 4360 Output Parameter: 4361 . A - the matrix 4362 4363 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4364 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4365 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4366 4367 Notes: 4368 If the *_nnz parameter is given then the *_nz parameter is ignored 4369 4370 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4371 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4372 storage requirements for this matrix. 4373 4374 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4375 processor than it must be used on all processors that share the object for 4376 that argument. 4377 4378 The user MUST specify either the local or global matrix dimensions 4379 (possibly both). 4380 4381 The parallel matrix is partitioned across processors such that the 4382 first m0 rows belong to process 0, the next m1 rows belong to 4383 process 1, the next m2 rows belong to process 2 etc.. where 4384 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4385 values corresponding to [m x N] submatrix. 4386 4387 The columns are logically partitioned with the n0 columns belonging 4388 to 0th partition, the next n1 columns belonging to the next 4389 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4390 4391 The DIAGONAL portion of the local submatrix on any given processor 4392 is the submatrix corresponding to the rows and columns m,n 4393 corresponding to the given processor. i.e diagonal matrix on 4394 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4395 etc. The remaining portion of the local submatrix [m x (N-n)] 4396 constitute the OFF-DIAGONAL portion. The example below better 4397 illustrates this concept. 4398 4399 For a square global matrix we define each processor's diagonal portion 4400 to be its local rows and the corresponding columns (a square submatrix); 4401 each processor's off-diagonal portion encompasses the remainder of the 4402 local matrix (a rectangular submatrix). 4403 4404 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4405 4406 When calling this routine with a single process communicator, a matrix of 4407 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4408 type of communicator, use the construction mechanism: 4409 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4410 4411 By default, this format uses inodes (identical nodes) when possible. 4412 We search for consecutive rows with the same nonzero structure, thereby 4413 reusing matrix information to achieve increased efficiency. 4414 4415 Options Database Keys: 4416 + -mat_no_inode - Do not use inodes 4417 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4418 - -mat_aij_oneindex - Internally use indexing starting at 1 4419 rather than 0. Note that when calling MatSetValues(), 4420 the user still MUST index entries starting at 0! 4421 4422 4423 Example usage: 4424 4425 Consider the following 8x8 matrix with 34 non-zero values, that is 4426 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4427 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4428 as follows: 4429 4430 .vb 4431 1 2 0 | 0 3 0 | 0 4 4432 Proc0 0 5 6 | 7 0 0 | 8 0 4433 9 0 10 | 11 0 0 | 12 0 4434 ------------------------------------- 4435 13 0 14 | 15 16 17 | 0 0 4436 Proc1 0 18 0 | 19 20 21 | 0 0 4437 0 0 0 | 22 23 0 | 24 0 4438 ------------------------------------- 4439 Proc2 25 26 27 | 0 0 28 | 29 0 4440 30 0 0 | 31 32 33 | 0 34 4441 .ve 4442 4443 This can be represented as a collection of submatrices as: 4444 4445 .vb 4446 A B C 4447 D E F 4448 G H I 4449 .ve 4450 4451 Where the submatrices A,B,C are owned by proc0, D,E,F are 4452 owned by proc1, G,H,I are owned by proc2. 4453 4454 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4455 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4456 The 'M','N' parameters are 8,8, and have the same values on all procs. 4457 4458 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4459 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4460 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4461 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4462 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4463 matrix, ans [DF] as another SeqAIJ matrix. 4464 4465 When d_nz, o_nz parameters are specified, d_nz storage elements are 4466 allocated for every row of the local diagonal submatrix, and o_nz 4467 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4468 One way to choose d_nz and o_nz is to use the max nonzerors per local 4469 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4470 In this case, the values of d_nz,o_nz are: 4471 .vb 4472 proc0 : dnz = 2, o_nz = 2 4473 proc1 : dnz = 3, o_nz = 2 4474 proc2 : dnz = 1, o_nz = 4 4475 .ve 4476 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4477 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4478 for proc3. i.e we are using 12+15+10=37 storage locations to store 4479 34 values. 4480 4481 When d_nnz, o_nnz parameters are specified, the storage is specified 4482 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4483 In the above case the values for d_nnz,o_nnz are: 4484 .vb 4485 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4486 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4487 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4488 .ve 4489 Here the space allocated is sum of all the above values i.e 34, and 4490 hence pre-allocation is perfect. 4491 4492 Level: intermediate 4493 4494 .keywords: matrix, aij, compressed row, sparse, parallel 4495 4496 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4497 MPIAIJ, MatCreateMPIAIJWithArrays() 4498 @*/ 4499 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4500 { 4501 PetscErrorCode ierr; 4502 PetscMPIInt size; 4503 4504 PetscFunctionBegin; 4505 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4506 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4507 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4508 if (size > 1) { 4509 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4510 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4511 } else { 4512 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4513 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4514 } 4515 PetscFunctionReturn(0); 4516 } 4517 4518 #undef __FUNCT__ 4519 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4520 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4521 { 4522 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4523 4524 PetscFunctionBegin; 4525 *Ad = a->A; 4526 *Ao = a->B; 4527 *colmap = a->garray; 4528 PetscFunctionReturn(0); 4529 } 4530 4531 #undef __FUNCT__ 4532 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4533 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4534 { 4535 PetscErrorCode ierr; 4536 PetscInt i; 4537 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4538 4539 PetscFunctionBegin; 4540 if (coloring->ctype == IS_COLORING_GLOBAL) { 4541 ISColoringValue *allcolors,*colors; 4542 ISColoring ocoloring; 4543 4544 /* set coloring for diagonal portion */ 4545 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4546 4547 /* set coloring for off-diagonal portion */ 4548 ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr); 4549 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4550 for (i=0; i<a->B->cmap->n; i++) { 4551 colors[i] = allcolors[a->garray[i]]; 4552 } 4553 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4554 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4555 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4556 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4557 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4558 ISColoringValue *colors; 4559 PetscInt *larray; 4560 ISColoring ocoloring; 4561 4562 /* set coloring for diagonal portion */ 4563 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4564 for (i=0; i<a->A->cmap->n; i++) { 4565 larray[i] = i + A->cmap->rstart; 4566 } 4567 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr); 4568 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4569 for (i=0; i<a->A->cmap->n; i++) { 4570 colors[i] = coloring->colors[larray[i]]; 4571 } 4572 ierr = PetscFree(larray);CHKERRQ(ierr); 4573 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4574 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4575 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4576 4577 /* set coloring for off-diagonal portion */ 4578 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4579 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr); 4580 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4581 for (i=0; i<a->B->cmap->n; i++) { 4582 colors[i] = coloring->colors[larray[i]]; 4583 } 4584 ierr = PetscFree(larray);CHKERRQ(ierr); 4585 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4586 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4587 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4588 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4589 PetscFunctionReturn(0); 4590 } 4591 4592 #undef __FUNCT__ 4593 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4594 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4595 { 4596 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4597 PetscErrorCode ierr; 4598 4599 PetscFunctionBegin; 4600 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4601 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4602 PetscFunctionReturn(0); 4603 } 4604 4605 #undef __FUNCT__ 4606 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4607 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4608 { 4609 PetscErrorCode ierr; 4610 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4611 PetscInt *indx; 4612 4613 PetscFunctionBegin; 4614 /* This routine will ONLY return MPIAIJ type matrix */ 4615 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4616 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4617 if (n == PETSC_DECIDE) { 4618 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4619 } 4620 /* Check sum(n) = N */ 4621 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4622 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4623 4624 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4625 rstart -= m; 4626 4627 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4628 for (i=0; i<m; i++) { 4629 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4630 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4631 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4632 } 4633 4634 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4635 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4636 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4637 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4638 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4639 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4640 PetscFunctionReturn(0); 4641 } 4642 4643 #undef __FUNCT__ 4644 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4645 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4646 { 4647 PetscErrorCode ierr; 4648 PetscInt m,N,i,rstart,nnz,Ii; 4649 PetscInt *indx; 4650 PetscScalar *values; 4651 4652 PetscFunctionBegin; 4653 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4654 ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr); 4655 for (i=0; i<m; i++) { 4656 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4657 Ii = i + rstart; 4658 ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4659 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4660 } 4661 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4662 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4663 PetscFunctionReturn(0); 4664 } 4665 4666 #undef __FUNCT__ 4667 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4668 /*@ 4669 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4670 matrices from each processor 4671 4672 Collective on MPI_Comm 4673 4674 Input Parameters: 4675 + comm - the communicators the parallel matrix will live on 4676 . inmat - the input sequential matrices 4677 . n - number of local columns (or PETSC_DECIDE) 4678 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4679 4680 Output Parameter: 4681 . outmat - the parallel matrix generated 4682 4683 Level: advanced 4684 4685 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4686 4687 @*/ 4688 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4689 { 4690 PetscErrorCode ierr; 4691 4692 PetscFunctionBegin; 4693 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4694 if (scall == MAT_INITIAL_MATRIX) { 4695 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4696 } 4697 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4698 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4699 PetscFunctionReturn(0); 4700 } 4701 4702 #undef __FUNCT__ 4703 #define __FUNCT__ "MatFileSplit" 4704 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4705 { 4706 PetscErrorCode ierr; 4707 PetscMPIInt rank; 4708 PetscInt m,N,i,rstart,nnz; 4709 size_t len; 4710 const PetscInt *indx; 4711 PetscViewer out; 4712 char *name; 4713 Mat B; 4714 const PetscScalar *values; 4715 4716 PetscFunctionBegin; 4717 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4718 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4719 /* Should this be the type of the diagonal block of A? */ 4720 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4721 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4722 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 4723 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4724 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4725 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4726 for (i=0; i<m; i++) { 4727 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4728 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4729 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4730 } 4731 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4732 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4733 4734 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4735 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4736 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4737 sprintf(name,"%s.%d",outfile,rank); 4738 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4739 ierr = PetscFree(name);CHKERRQ(ierr); 4740 ierr = MatView(B,out);CHKERRQ(ierr); 4741 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4742 ierr = MatDestroy(&B);CHKERRQ(ierr); 4743 PetscFunctionReturn(0); 4744 } 4745 4746 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4747 #undef __FUNCT__ 4748 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4749 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4750 { 4751 PetscErrorCode ierr; 4752 Mat_Merge_SeqsToMPI *merge; 4753 PetscContainer container; 4754 4755 PetscFunctionBegin; 4756 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4757 if (container) { 4758 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4759 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4760 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4761 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4762 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4763 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4764 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4765 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4766 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4767 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4768 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4769 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4770 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4771 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4772 ierr = PetscFree(merge);CHKERRQ(ierr); 4773 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4774 } 4775 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4776 PetscFunctionReturn(0); 4777 } 4778 4779 #include <../src/mat/utils/freespace.h> 4780 #include <petscbt.h> 4781 4782 #undef __FUNCT__ 4783 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4784 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4785 { 4786 PetscErrorCode ierr; 4787 MPI_Comm comm; 4788 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4789 PetscMPIInt size,rank,taga,*len_s; 4790 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4791 PetscInt proc,m; 4792 PetscInt **buf_ri,**buf_rj; 4793 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4794 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4795 MPI_Request *s_waits,*r_waits; 4796 MPI_Status *status; 4797 MatScalar *aa=a->a; 4798 MatScalar **abuf_r,*ba_i; 4799 Mat_Merge_SeqsToMPI *merge; 4800 PetscContainer container; 4801 4802 PetscFunctionBegin; 4803 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4804 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4805 4806 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4807 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4808 4809 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4810 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4811 4812 bi = merge->bi; 4813 bj = merge->bj; 4814 buf_ri = merge->buf_ri; 4815 buf_rj = merge->buf_rj; 4816 4817 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4818 owners = merge->rowmap->range; 4819 len_s = merge->len_s; 4820 4821 /* send and recv matrix values */ 4822 /*-----------------------------*/ 4823 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4824 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4825 4826 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4827 for (proc=0,k=0; proc<size; proc++) { 4828 if (!len_s[proc]) continue; 4829 i = owners[proc]; 4830 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4831 k++; 4832 } 4833 4834 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4835 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4836 ierr = PetscFree(status);CHKERRQ(ierr); 4837 4838 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4839 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4840 4841 /* insert mat values of mpimat */ 4842 /*----------------------------*/ 4843 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4844 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4845 4846 for (k=0; k<merge->nrecv; k++) { 4847 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4848 nrows = *(buf_ri_k[k]); 4849 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4850 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4851 } 4852 4853 /* set values of ba */ 4854 m = merge->rowmap->n; 4855 for (i=0; i<m; i++) { 4856 arow = owners[rank] + i; 4857 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4858 bnzi = bi[i+1] - bi[i]; 4859 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4860 4861 /* add local non-zero vals of this proc's seqmat into ba */ 4862 anzi = ai[arow+1] - ai[arow]; 4863 aj = a->j + ai[arow]; 4864 aa = a->a + ai[arow]; 4865 nextaj = 0; 4866 for (j=0; nextaj<anzi; j++) { 4867 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4868 ba_i[j] += aa[nextaj++]; 4869 } 4870 } 4871 4872 /* add received vals into ba */ 4873 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4874 /* i-th row */ 4875 if (i == *nextrow[k]) { 4876 anzi = *(nextai[k]+1) - *nextai[k]; 4877 aj = buf_rj[k] + *(nextai[k]); 4878 aa = abuf_r[k] + *(nextai[k]); 4879 nextaj = 0; 4880 for (j=0; nextaj<anzi; j++) { 4881 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4882 ba_i[j] += aa[nextaj++]; 4883 } 4884 } 4885 nextrow[k]++; nextai[k]++; 4886 } 4887 } 4888 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4889 } 4890 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4891 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4892 4893 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4894 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4895 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4896 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4897 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4898 PetscFunctionReturn(0); 4899 } 4900 4901 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4902 4903 #undef __FUNCT__ 4904 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4905 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4906 { 4907 PetscErrorCode ierr; 4908 Mat B_mpi; 4909 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4910 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4911 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4912 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4913 PetscInt len,proc,*dnz,*onz,bs,cbs; 4914 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4915 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4916 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4917 MPI_Status *status; 4918 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4919 PetscBT lnkbt; 4920 Mat_Merge_SeqsToMPI *merge; 4921 PetscContainer container; 4922 4923 PetscFunctionBegin; 4924 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4925 4926 /* make sure it is a PETSc comm */ 4927 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4928 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4929 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4930 4931 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4932 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4933 4934 /* determine row ownership */ 4935 /*---------------------------------------------------------*/ 4936 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4937 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4938 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4939 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4940 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4941 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4942 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4943 4944 m = merge->rowmap->n; 4945 owners = merge->rowmap->range; 4946 4947 /* determine the number of messages to send, their lengths */ 4948 /*---------------------------------------------------------*/ 4949 len_s = merge->len_s; 4950 4951 len = 0; /* length of buf_si[] */ 4952 merge->nsend = 0; 4953 for (proc=0; proc<size; proc++) { 4954 len_si[proc] = 0; 4955 if (proc == rank) { 4956 len_s[proc] = 0; 4957 } else { 4958 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4959 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4960 } 4961 if (len_s[proc]) { 4962 merge->nsend++; 4963 nrows = 0; 4964 for (i=owners[proc]; i<owners[proc+1]; i++) { 4965 if (ai[i+1] > ai[i]) nrows++; 4966 } 4967 len_si[proc] = 2*(nrows+1); 4968 len += len_si[proc]; 4969 } 4970 } 4971 4972 /* determine the number and length of messages to receive for ij-structure */ 4973 /*-------------------------------------------------------------------------*/ 4974 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4975 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4976 4977 /* post the Irecv of j-structure */ 4978 /*-------------------------------*/ 4979 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4980 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4981 4982 /* post the Isend of j-structure */ 4983 /*--------------------------------*/ 4984 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4985 4986 for (proc=0, k=0; proc<size; proc++) { 4987 if (!len_s[proc]) continue; 4988 i = owners[proc]; 4989 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4990 k++; 4991 } 4992 4993 /* receives and sends of j-structure are complete */ 4994 /*------------------------------------------------*/ 4995 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4996 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4997 4998 /* send and recv i-structure */ 4999 /*---------------------------*/ 5000 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 5001 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 5002 5003 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 5004 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 5005 for (proc=0,k=0; proc<size; proc++) { 5006 if (!len_s[proc]) continue; 5007 /* form outgoing message for i-structure: 5008 buf_si[0]: nrows to be sent 5009 [1:nrows]: row index (global) 5010 [nrows+1:2*nrows+1]: i-structure index 5011 */ 5012 /*-------------------------------------------*/ 5013 nrows = len_si[proc]/2 - 1; 5014 buf_si_i = buf_si + nrows+1; 5015 buf_si[0] = nrows; 5016 buf_si_i[0] = 0; 5017 nrows = 0; 5018 for (i=owners[proc]; i<owners[proc+1]; i++) { 5019 anzi = ai[i+1] - ai[i]; 5020 if (anzi) { 5021 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 5022 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 5023 nrows++; 5024 } 5025 } 5026 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 5027 k++; 5028 buf_si += len_si[proc]; 5029 } 5030 5031 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 5032 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 5033 5034 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 5035 for (i=0; i<merge->nrecv; i++) { 5036 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 5037 } 5038 5039 ierr = PetscFree(len_si);CHKERRQ(ierr); 5040 ierr = PetscFree(len_ri);CHKERRQ(ierr); 5041 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 5042 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 5043 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 5044 ierr = PetscFree(buf_s);CHKERRQ(ierr); 5045 ierr = PetscFree(status);CHKERRQ(ierr); 5046 5047 /* compute a local seq matrix in each processor */ 5048 /*----------------------------------------------*/ 5049 /* allocate bi array and free space for accumulating nonzero column info */ 5050 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 5051 bi[0] = 0; 5052 5053 /* create and initialize a linked list */ 5054 nlnk = N+1; 5055 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 5056 5057 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 5058 len = ai[owners[rank+1]] - ai[owners[rank]]; 5059 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 5060 5061 current_space = free_space; 5062 5063 /* determine symbolic info for each local row */ 5064 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 5065 5066 for (k=0; k<merge->nrecv; k++) { 5067 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 5068 nrows = *buf_ri_k[k]; 5069 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 5070 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 5071 } 5072 5073 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 5074 len = 0; 5075 for (i=0; i<m; i++) { 5076 bnzi = 0; 5077 /* add local non-zero cols of this proc's seqmat into lnk */ 5078 arow = owners[rank] + i; 5079 anzi = ai[arow+1] - ai[arow]; 5080 aj = a->j + ai[arow]; 5081 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 5082 bnzi += nlnk; 5083 /* add received col data into lnk */ 5084 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 5085 if (i == *nextrow[k]) { /* i-th row */ 5086 anzi = *(nextai[k]+1) - *nextai[k]; 5087 aj = buf_rj[k] + *nextai[k]; 5088 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 5089 bnzi += nlnk; 5090 nextrow[k]++; nextai[k]++; 5091 } 5092 } 5093 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 5094 5095 /* if free space is not available, make more free space */ 5096 if (current_space->local_remaining<bnzi) { 5097 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 5098 nspacedouble++; 5099 } 5100 /* copy data into free space, then initialize lnk */ 5101 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 5102 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 5103 5104 current_space->array += bnzi; 5105 current_space->local_used += bnzi; 5106 current_space->local_remaining -= bnzi; 5107 5108 bi[i+1] = bi[i] + bnzi; 5109 } 5110 5111 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 5112 5113 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 5114 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 5115 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 5116 5117 /* create symbolic parallel matrix B_mpi */ 5118 /*---------------------------------------*/ 5119 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 5120 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 5121 if (n==PETSC_DECIDE) { 5122 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 5123 } else { 5124 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5125 } 5126 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 5127 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 5128 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 5129 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 5130 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5131 5132 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5133 B_mpi->assembled = PETSC_FALSE; 5134 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 5135 merge->bi = bi; 5136 merge->bj = bj; 5137 merge->buf_ri = buf_ri; 5138 merge->buf_rj = buf_rj; 5139 merge->coi = NULL; 5140 merge->coj = NULL; 5141 merge->owners_co = NULL; 5142 5143 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 5144 5145 /* attach the supporting struct to B_mpi for reuse */ 5146 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 5147 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 5148 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 5149 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 5150 *mpimat = B_mpi; 5151 5152 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 5153 PetscFunctionReturn(0); 5154 } 5155 5156 #undef __FUNCT__ 5157 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 5158 /*@C 5159 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 5160 matrices from each processor 5161 5162 Collective on MPI_Comm 5163 5164 Input Parameters: 5165 + comm - the communicators the parallel matrix will live on 5166 . seqmat - the input sequential matrices 5167 . m - number of local rows (or PETSC_DECIDE) 5168 . n - number of local columns (or PETSC_DECIDE) 5169 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5170 5171 Output Parameter: 5172 . mpimat - the parallel matrix generated 5173 5174 Level: advanced 5175 5176 Notes: 5177 The dimensions of the sequential matrix in each processor MUST be the same. 5178 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5179 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 5180 @*/ 5181 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 5182 { 5183 PetscErrorCode ierr; 5184 PetscMPIInt size; 5185 5186 PetscFunctionBegin; 5187 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5188 if (size == 1) { 5189 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5190 if (scall == MAT_INITIAL_MATRIX) { 5191 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 5192 } else { 5193 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5194 } 5195 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5196 PetscFunctionReturn(0); 5197 } 5198 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5199 if (scall == MAT_INITIAL_MATRIX) { 5200 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 5201 } 5202 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 5203 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5204 PetscFunctionReturn(0); 5205 } 5206 5207 #undef __FUNCT__ 5208 #define __FUNCT__ "MatMPIAIJGetLocalMat" 5209 /*@ 5210 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 5211 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5212 with MatGetSize() 5213 5214 Not Collective 5215 5216 Input Parameters: 5217 + A - the matrix 5218 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5219 5220 Output Parameter: 5221 . A_loc - the local sequential matrix generated 5222 5223 Level: developer 5224 5225 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 5226 5227 @*/ 5228 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5229 { 5230 PetscErrorCode ierr; 5231 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5232 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 5233 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 5234 MatScalar *aa=a->a,*ba=b->a,*cam; 5235 PetscScalar *ca; 5236 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5237 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5238 PetscBool match; 5239 5240 PetscFunctionBegin; 5241 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5242 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5243 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5244 if (scall == MAT_INITIAL_MATRIX) { 5245 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 5246 ci[0] = 0; 5247 for (i=0; i<am; i++) { 5248 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5249 } 5250 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 5251 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 5252 k = 0; 5253 for (i=0; i<am; i++) { 5254 ncols_o = bi[i+1] - bi[i]; 5255 ncols_d = ai[i+1] - ai[i]; 5256 /* off-diagonal portion of A */ 5257 for (jo=0; jo<ncols_o; jo++) { 5258 col = cmap[*bj]; 5259 if (col >= cstart) break; 5260 cj[k] = col; bj++; 5261 ca[k++] = *ba++; 5262 } 5263 /* diagonal portion of A */ 5264 for (j=0; j<ncols_d; j++) { 5265 cj[k] = cstart + *aj++; 5266 ca[k++] = *aa++; 5267 } 5268 /* off-diagonal portion of A */ 5269 for (j=jo; j<ncols_o; j++) { 5270 cj[k] = cmap[*bj++]; 5271 ca[k++] = *ba++; 5272 } 5273 } 5274 /* put together the new matrix */ 5275 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5276 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5277 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5278 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5279 mat->free_a = PETSC_TRUE; 5280 mat->free_ij = PETSC_TRUE; 5281 mat->nonew = 0; 5282 } else if (scall == MAT_REUSE_MATRIX) { 5283 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5284 ci = mat->i; cj = mat->j; cam = mat->a; 5285 for (i=0; i<am; i++) { 5286 /* off-diagonal portion of A */ 5287 ncols_o = bi[i+1] - bi[i]; 5288 for (jo=0; jo<ncols_o; jo++) { 5289 col = cmap[*bj]; 5290 if (col >= cstart) break; 5291 *cam++ = *ba++; bj++; 5292 } 5293 /* diagonal portion of A */ 5294 ncols_d = ai[i+1] - ai[i]; 5295 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5296 /* off-diagonal portion of A */ 5297 for (j=jo; j<ncols_o; j++) { 5298 *cam++ = *ba++; bj++; 5299 } 5300 } 5301 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5302 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5303 PetscFunctionReturn(0); 5304 } 5305 5306 #undef __FUNCT__ 5307 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5308 /*@C 5309 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5310 5311 Not Collective 5312 5313 Input Parameters: 5314 + A - the matrix 5315 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5316 - row, col - index sets of rows and columns to extract (or NULL) 5317 5318 Output Parameter: 5319 . A_loc - the local sequential matrix generated 5320 5321 Level: developer 5322 5323 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5324 5325 @*/ 5326 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5327 { 5328 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5329 PetscErrorCode ierr; 5330 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5331 IS isrowa,iscola; 5332 Mat *aloc; 5333 PetscBool match; 5334 5335 PetscFunctionBegin; 5336 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5337 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5338 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5339 if (!row) { 5340 start = A->rmap->rstart; end = A->rmap->rend; 5341 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5342 } else { 5343 isrowa = *row; 5344 } 5345 if (!col) { 5346 start = A->cmap->rstart; 5347 cmap = a->garray; 5348 nzA = a->A->cmap->n; 5349 nzB = a->B->cmap->n; 5350 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5351 ncols = 0; 5352 for (i=0; i<nzB; i++) { 5353 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5354 else break; 5355 } 5356 imark = i; 5357 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5358 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5359 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5360 } else { 5361 iscola = *col; 5362 } 5363 if (scall != MAT_INITIAL_MATRIX) { 5364 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5365 aloc[0] = *A_loc; 5366 } 5367 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5368 *A_loc = aloc[0]; 5369 ierr = PetscFree(aloc);CHKERRQ(ierr); 5370 if (!row) { 5371 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5372 } 5373 if (!col) { 5374 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5375 } 5376 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5377 PetscFunctionReturn(0); 5378 } 5379 5380 #undef __FUNCT__ 5381 #define __FUNCT__ "MatGetBrowsOfAcols" 5382 /*@C 5383 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5384 5385 Collective on Mat 5386 5387 Input Parameters: 5388 + A,B - the matrices in mpiaij format 5389 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5390 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5391 5392 Output Parameter: 5393 + rowb, colb - index sets of rows and columns of B to extract 5394 - B_seq - the sequential matrix generated 5395 5396 Level: developer 5397 5398 @*/ 5399 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5400 { 5401 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5402 PetscErrorCode ierr; 5403 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5404 IS isrowb,iscolb; 5405 Mat *bseq=NULL; 5406 5407 PetscFunctionBegin; 5408 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5409 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5410 } 5411 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5412 5413 if (scall == MAT_INITIAL_MATRIX) { 5414 start = A->cmap->rstart; 5415 cmap = a->garray; 5416 nzA = a->A->cmap->n; 5417 nzB = a->B->cmap->n; 5418 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5419 ncols = 0; 5420 for (i=0; i<nzB; i++) { /* row < local row index */ 5421 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5422 else break; 5423 } 5424 imark = i; 5425 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5426 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5427 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5428 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5429 } else { 5430 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5431 isrowb = *rowb; iscolb = *colb; 5432 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5433 bseq[0] = *B_seq; 5434 } 5435 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5436 *B_seq = bseq[0]; 5437 ierr = PetscFree(bseq);CHKERRQ(ierr); 5438 if (!rowb) { 5439 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5440 } else { 5441 *rowb = isrowb; 5442 } 5443 if (!colb) { 5444 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5445 } else { 5446 *colb = iscolb; 5447 } 5448 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5449 PetscFunctionReturn(0); 5450 } 5451 5452 #undef __FUNCT__ 5453 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5454 /* 5455 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5456 of the OFF-DIAGONAL portion of local A 5457 5458 Collective on Mat 5459 5460 Input Parameters: 5461 + A,B - the matrices in mpiaij format 5462 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5463 5464 Output Parameter: 5465 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5466 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5467 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5468 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5469 5470 Level: developer 5471 5472 */ 5473 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5474 { 5475 VecScatter_MPI_General *gen_to,*gen_from; 5476 PetscErrorCode ierr; 5477 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5478 Mat_SeqAIJ *b_oth; 5479 VecScatter ctx =a->Mvctx; 5480 MPI_Comm comm; 5481 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5482 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5483 PetscScalar *rvalues,*svalues; 5484 MatScalar *b_otha,*bufa,*bufA; 5485 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5486 MPI_Request *rwaits = NULL,*swaits = NULL; 5487 MPI_Status *sstatus,rstatus; 5488 PetscMPIInt jj; 5489 PetscInt *cols,sbs,rbs; 5490 PetscScalar *vals; 5491 5492 PetscFunctionBegin; 5493 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5494 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5495 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5496 } 5497 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5498 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5499 5500 gen_to = (VecScatter_MPI_General*)ctx->todata; 5501 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5502 rvalues = gen_from->values; /* holds the length of receiving row */ 5503 svalues = gen_to->values; /* holds the length of sending row */ 5504 nrecvs = gen_from->n; 5505 nsends = gen_to->n; 5506 5507 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5508 srow = gen_to->indices; /* local row index to be sent */ 5509 sstarts = gen_to->starts; 5510 sprocs = gen_to->procs; 5511 sstatus = gen_to->sstatus; 5512 sbs = gen_to->bs; 5513 rstarts = gen_from->starts; 5514 rprocs = gen_from->procs; 5515 rbs = gen_from->bs; 5516 5517 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5518 if (scall == MAT_INITIAL_MATRIX) { 5519 /* i-array */ 5520 /*---------*/ 5521 /* post receives */ 5522 for (i=0; i<nrecvs; i++) { 5523 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5524 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5525 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5526 } 5527 5528 /* pack the outgoing message */ 5529 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5530 5531 sstartsj[0] = 0; 5532 rstartsj[0] = 0; 5533 len = 0; /* total length of j or a array to be sent */ 5534 k = 0; 5535 for (i=0; i<nsends; i++) { 5536 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5537 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5538 for (j=0; j<nrows; j++) { 5539 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5540 for (l=0; l<sbs; l++) { 5541 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5542 5543 rowlen[j*sbs+l] = ncols; 5544 5545 len += ncols; 5546 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5547 } 5548 k++; 5549 } 5550 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5551 5552 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5553 } 5554 /* recvs and sends of i-array are completed */ 5555 i = nrecvs; 5556 while (i--) { 5557 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5558 } 5559 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5560 5561 /* allocate buffers for sending j and a arrays */ 5562 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5563 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5564 5565 /* create i-array of B_oth */ 5566 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5567 5568 b_othi[0] = 0; 5569 len = 0; /* total length of j or a array to be received */ 5570 k = 0; 5571 for (i=0; i<nrecvs; i++) { 5572 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5573 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5574 for (j=0; j<nrows; j++) { 5575 b_othi[k+1] = b_othi[k] + rowlen[j]; 5576 len += rowlen[j]; k++; 5577 } 5578 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5579 } 5580 5581 /* allocate space for j and a arrrays of B_oth */ 5582 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5583 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5584 5585 /* j-array */ 5586 /*---------*/ 5587 /* post receives of j-array */ 5588 for (i=0; i<nrecvs; i++) { 5589 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5590 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5591 } 5592 5593 /* pack the outgoing message j-array */ 5594 k = 0; 5595 for (i=0; i<nsends; i++) { 5596 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5597 bufJ = bufj+sstartsj[i]; 5598 for (j=0; j<nrows; j++) { 5599 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5600 for (ll=0; ll<sbs; ll++) { 5601 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5602 for (l=0; l<ncols; l++) { 5603 *bufJ++ = cols[l]; 5604 } 5605 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5606 } 5607 } 5608 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5609 } 5610 5611 /* recvs and sends of j-array are completed */ 5612 i = nrecvs; 5613 while (i--) { 5614 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5615 } 5616 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5617 } else if (scall == MAT_REUSE_MATRIX) { 5618 sstartsj = *startsj_s; 5619 rstartsj = *startsj_r; 5620 bufa = *bufa_ptr; 5621 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5622 b_otha = b_oth->a; 5623 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5624 5625 /* a-array */ 5626 /*---------*/ 5627 /* post receives of a-array */ 5628 for (i=0; i<nrecvs; i++) { 5629 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5630 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5631 } 5632 5633 /* pack the outgoing message a-array */ 5634 k = 0; 5635 for (i=0; i<nsends; i++) { 5636 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5637 bufA = bufa+sstartsj[i]; 5638 for (j=0; j<nrows; j++) { 5639 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5640 for (ll=0; ll<sbs; ll++) { 5641 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5642 for (l=0; l<ncols; l++) { 5643 *bufA++ = vals[l]; 5644 } 5645 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5646 } 5647 } 5648 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5649 } 5650 /* recvs and sends of a-array are completed */ 5651 i = nrecvs; 5652 while (i--) { 5653 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5654 } 5655 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5656 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5657 5658 if (scall == MAT_INITIAL_MATRIX) { 5659 /* put together the new matrix */ 5660 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5661 5662 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5663 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5664 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5665 b_oth->free_a = PETSC_TRUE; 5666 b_oth->free_ij = PETSC_TRUE; 5667 b_oth->nonew = 0; 5668 5669 ierr = PetscFree(bufj);CHKERRQ(ierr); 5670 if (!startsj_s || !bufa_ptr) { 5671 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5672 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5673 } else { 5674 *startsj_s = sstartsj; 5675 *startsj_r = rstartsj; 5676 *bufa_ptr = bufa; 5677 } 5678 } 5679 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5680 PetscFunctionReturn(0); 5681 } 5682 5683 #undef __FUNCT__ 5684 #define __FUNCT__ "MatGetCommunicationStructs" 5685 /*@C 5686 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5687 5688 Not Collective 5689 5690 Input Parameters: 5691 . A - The matrix in mpiaij format 5692 5693 Output Parameter: 5694 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5695 . colmap - A map from global column index to local index into lvec 5696 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5697 5698 Level: developer 5699 5700 @*/ 5701 #if defined(PETSC_USE_CTABLE) 5702 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5703 #else 5704 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5705 #endif 5706 { 5707 Mat_MPIAIJ *a; 5708 5709 PetscFunctionBegin; 5710 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5711 PetscValidPointer(lvec, 2); 5712 PetscValidPointer(colmap, 3); 5713 PetscValidPointer(multScatter, 4); 5714 a = (Mat_MPIAIJ*) A->data; 5715 if (lvec) *lvec = a->lvec; 5716 if (colmap) *colmap = a->colmap; 5717 if (multScatter) *multScatter = a->Mvctx; 5718 PetscFunctionReturn(0); 5719 } 5720 5721 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5722 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5723 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5724 5725 #undef __FUNCT__ 5726 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5727 /* 5728 Computes (B'*A')' since computing B*A directly is untenable 5729 5730 n p p 5731 ( ) ( ) ( ) 5732 m ( A ) * n ( B ) = m ( C ) 5733 ( ) ( ) ( ) 5734 5735 */ 5736 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5737 { 5738 PetscErrorCode ierr; 5739 Mat At,Bt,Ct; 5740 5741 PetscFunctionBegin; 5742 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5743 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5744 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5745 ierr = MatDestroy(&At);CHKERRQ(ierr); 5746 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5747 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5748 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5749 PetscFunctionReturn(0); 5750 } 5751 5752 #undef __FUNCT__ 5753 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5754 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5755 { 5756 PetscErrorCode ierr; 5757 PetscInt m=A->rmap->n,n=B->cmap->n; 5758 Mat Cmat; 5759 5760 PetscFunctionBegin; 5761 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5762 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5763 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5764 ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 5765 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5766 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5767 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5768 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5769 5770 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5771 5772 *C = Cmat; 5773 PetscFunctionReturn(0); 5774 } 5775 5776 /* ----------------------------------------------------------------*/ 5777 #undef __FUNCT__ 5778 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5779 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5780 { 5781 PetscErrorCode ierr; 5782 5783 PetscFunctionBegin; 5784 if (scall == MAT_INITIAL_MATRIX) { 5785 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5786 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5787 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5788 } 5789 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5790 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5791 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5792 PetscFunctionReturn(0); 5793 } 5794 5795 #if defined(PETSC_HAVE_MUMPS) 5796 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5797 #endif 5798 #if defined(PETSC_HAVE_PASTIX) 5799 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5800 #endif 5801 #if defined(PETSC_HAVE_SUPERLU_DIST) 5802 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5803 #endif 5804 #if defined(PETSC_HAVE_CLIQUE) 5805 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*); 5806 #endif 5807 5808 /*MC 5809 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5810 5811 Options Database Keys: 5812 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5813 5814 Level: beginner 5815 5816 .seealso: MatCreateAIJ() 5817 M*/ 5818 5819 #undef __FUNCT__ 5820 #define __FUNCT__ "MatCreate_MPIAIJ" 5821 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5822 { 5823 Mat_MPIAIJ *b; 5824 PetscErrorCode ierr; 5825 PetscMPIInt size; 5826 5827 PetscFunctionBegin; 5828 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5829 5830 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5831 B->data = (void*)b; 5832 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5833 B->assembled = PETSC_FALSE; 5834 B->insertmode = NOT_SET_VALUES; 5835 b->size = size; 5836 5837 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5838 5839 /* build cache for off array entries formed */ 5840 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5841 5842 b->donotstash = PETSC_FALSE; 5843 b->colmap = 0; 5844 b->garray = 0; 5845 b->roworiented = PETSC_TRUE; 5846 5847 /* stuff used for matrix vector multiply */ 5848 b->lvec = NULL; 5849 b->Mvctx = NULL; 5850 5851 /* stuff for MatGetRow() */ 5852 b->rowindices = 0; 5853 b->rowvalues = 0; 5854 b->getrowactive = PETSC_FALSE; 5855 5856 /* flexible pointer used in CUSP/CUSPARSE classes */ 5857 b->spptr = NULL; 5858 5859 #if defined(PETSC_HAVE_MUMPS) 5860 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr); 5861 #endif 5862 #if defined(PETSC_HAVE_PASTIX) 5863 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5864 #endif 5865 #if defined(PETSC_HAVE_SUPERLU_DIST) 5866 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5867 #endif 5868 #if defined(PETSC_HAVE_CLIQUE) 5869 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr); 5870 #endif 5871 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5872 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5873 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5874 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5875 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5876 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5877 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5878 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5879 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5880 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5881 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5882 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5883 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5884 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5885 PetscFunctionReturn(0); 5886 } 5887 5888 #undef __FUNCT__ 5889 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5890 /*@ 5891 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5892 and "off-diagonal" part of the matrix in CSR format. 5893 5894 Collective on MPI_Comm 5895 5896 Input Parameters: 5897 + comm - MPI communicator 5898 . m - number of local rows (Cannot be PETSC_DECIDE) 5899 . n - This value should be the same as the local size used in creating the 5900 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5901 calculated if N is given) For square matrices n is almost always m. 5902 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5903 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5904 . i - row indices for "diagonal" portion of matrix 5905 . j - column indices 5906 . a - matrix values 5907 . oi - row indices for "off-diagonal" portion of matrix 5908 . oj - column indices 5909 - oa - matrix values 5910 5911 Output Parameter: 5912 . mat - the matrix 5913 5914 Level: advanced 5915 5916 Notes: 5917 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5918 must free the arrays once the matrix has been destroyed and not before. 5919 5920 The i and j indices are 0 based 5921 5922 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5923 5924 This sets local rows and cannot be used to set off-processor values. 5925 5926 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5927 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5928 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5929 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5930 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5931 communication if it is known that only local entries will be set. 5932 5933 .keywords: matrix, aij, compressed row, sparse, parallel 5934 5935 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5936 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5937 @*/ 5938 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5939 { 5940 PetscErrorCode ierr; 5941 Mat_MPIAIJ *maij; 5942 5943 PetscFunctionBegin; 5944 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5945 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5946 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5947 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5948 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5949 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5950 maij = (Mat_MPIAIJ*) (*mat)->data; 5951 5952 (*mat)->preallocated = PETSC_TRUE; 5953 5954 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5955 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5956 5957 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5958 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5959 5960 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5961 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5962 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5963 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5964 5965 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5966 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5967 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5968 PetscFunctionReturn(0); 5969 } 5970 5971 /* 5972 Special version for direct calls from Fortran 5973 */ 5974 #include <petsc-private/fortranimpl.h> 5975 5976 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5977 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5978 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5979 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5980 #endif 5981 5982 /* Change these macros so can be used in void function */ 5983 #undef CHKERRQ 5984 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5985 #undef SETERRQ2 5986 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5987 #undef SETERRQ3 5988 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5989 #undef SETERRQ 5990 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5991 5992 #undef __FUNCT__ 5993 #define __FUNCT__ "matsetvaluesmpiaij_" 5994 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5995 { 5996 Mat mat = *mmat; 5997 PetscInt m = *mm, n = *mn; 5998 InsertMode addv = *maddv; 5999 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 6000 PetscScalar value; 6001 PetscErrorCode ierr; 6002 6003 MatCheckPreallocated(mat,1); 6004 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 6005 6006 #if defined(PETSC_USE_DEBUG) 6007 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 6008 #endif 6009 { 6010 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 6011 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 6012 PetscBool roworiented = aij->roworiented; 6013 6014 /* Some Variables required in the macro */ 6015 Mat A = aij->A; 6016 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 6017 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 6018 MatScalar *aa = a->a; 6019 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 6020 Mat B = aij->B; 6021 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 6022 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 6023 MatScalar *ba = b->a; 6024 6025 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 6026 PetscInt nonew = a->nonew; 6027 MatScalar *ap1,*ap2; 6028 6029 PetscFunctionBegin; 6030 for (i=0; i<m; i++) { 6031 if (im[i] < 0) continue; 6032 #if defined(PETSC_USE_DEBUG) 6033 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 6034 #endif 6035 if (im[i] >= rstart && im[i] < rend) { 6036 row = im[i] - rstart; 6037 lastcol1 = -1; 6038 rp1 = aj + ai[row]; 6039 ap1 = aa + ai[row]; 6040 rmax1 = aimax[row]; 6041 nrow1 = ailen[row]; 6042 low1 = 0; 6043 high1 = nrow1; 6044 lastcol2 = -1; 6045 rp2 = bj + bi[row]; 6046 ap2 = ba + bi[row]; 6047 rmax2 = bimax[row]; 6048 nrow2 = bilen[row]; 6049 low2 = 0; 6050 high2 = nrow2; 6051 6052 for (j=0; j<n; j++) { 6053 if (roworiented) value = v[i*n+j]; 6054 else value = v[i+j*m]; 6055 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 6056 if (in[j] >= cstart && in[j] < cend) { 6057 col = in[j] - cstart; 6058 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 6059 } else if (in[j] < 0) continue; 6060 #if defined(PETSC_USE_DEBUG) 6061 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 6062 #endif 6063 else { 6064 if (mat->was_assembled) { 6065 if (!aij->colmap) { 6066 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 6067 } 6068 #if defined(PETSC_USE_CTABLE) 6069 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 6070 col--; 6071 #else 6072 col = aij->colmap[in[j]] - 1; 6073 #endif 6074 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 6075 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 6076 col = in[j]; 6077 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 6078 B = aij->B; 6079 b = (Mat_SeqAIJ*)B->data; 6080 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 6081 rp2 = bj + bi[row]; 6082 ap2 = ba + bi[row]; 6083 rmax2 = bimax[row]; 6084 nrow2 = bilen[row]; 6085 low2 = 0; 6086 high2 = nrow2; 6087 bm = aij->B->rmap->n; 6088 ba = b->a; 6089 } 6090 } else col = in[j]; 6091 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 6092 } 6093 } 6094 } else if (!aij->donotstash) { 6095 if (roworiented) { 6096 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6097 } else { 6098 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6099 } 6100 } 6101 } 6102 } 6103 PetscFunctionReturnVoid(); 6104 } 6105 6106