1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc-private/vecimpl.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 /*MC 8 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 9 10 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 11 and MATMPIAIJ otherwise. As a result, for single process communicators, 12 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 13 for communicators controlling multiple processes. It is recommended that you call both of 14 the above preallocation routines for simplicity. 15 16 Options Database Keys: 17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 18 19 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 20 enough exist. 21 22 Level: beginner 23 24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 25 M*/ 26 27 /*MC 28 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 29 30 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 31 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 32 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 33 for communicators controlling multiple processes. It is recommended that you call both of 34 the above preallocation routines for simplicity. 35 36 Options Database Keys: 37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 38 39 Level: beginner 40 41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 42 M*/ 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 47 { 48 PetscErrorCode ierr; 49 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 50 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 51 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 52 const PetscInt *ia,*ib; 53 const MatScalar *aa,*bb; 54 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 55 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 56 57 PetscFunctionBegin; 58 *keptrows = 0; 59 ia = a->i; 60 ib = b->i; 61 for (i=0; i<m; i++) { 62 na = ia[i+1] - ia[i]; 63 nb = ib[i+1] - ib[i]; 64 if (!na && !nb) { 65 cnt++; 66 goto ok1; 67 } 68 aa = a->a + ia[i]; 69 for (j=0; j<na; j++) { 70 if (aa[j] != 0.0) goto ok1; 71 } 72 bb = b->a + ib[i]; 73 for (j=0; j <nb; j++) { 74 if (bb[j] != 0.0) goto ok1; 75 } 76 cnt++; 77 ok1:; 78 } 79 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPIU_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 80 if (!n0rows) PetscFunctionReturn(0); 81 ierr = PetscMalloc1((M->rmap->n-cnt),&rows);CHKERRQ(ierr); 82 cnt = 0; 83 for (i=0; i<m; i++) { 84 na = ia[i+1] - ia[i]; 85 nb = ib[i+1] - ib[i]; 86 if (!na && !nb) continue; 87 aa = a->a + ia[i]; 88 for (j=0; j<na;j++) { 89 if (aa[j] != 0.0) { 90 rows[cnt++] = rstart + i; 91 goto ok2; 92 } 93 } 94 bb = b->a + ib[i]; 95 for (j=0; j<nb; j++) { 96 if (bb[j] != 0.0) { 97 rows[cnt++] = rstart + i; 98 goto ok2; 99 } 100 } 101 ok2:; 102 } 103 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 104 PetscFunctionReturn(0); 105 } 106 107 #undef __FUNCT__ 108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 110 { 111 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 112 PetscErrorCode ierr; 113 PetscInt i,rstart,nrows,*rows; 114 115 PetscFunctionBegin; 116 *zrows = NULL; 117 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 118 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 119 for (i=0; i<nrows; i++) rows[i] += rstart; 120 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 121 PetscFunctionReturn(0); 122 } 123 124 #undef __FUNCT__ 125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 127 { 128 PetscErrorCode ierr; 129 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 130 PetscInt i,n,*garray = aij->garray; 131 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 132 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 133 PetscReal *work; 134 135 PetscFunctionBegin; 136 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 137 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 138 if (type == NORM_2) { 139 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 140 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 141 } 142 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 143 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 144 } 145 } else if (type == NORM_1) { 146 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 147 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 148 } 149 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 150 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 151 } 152 } else if (type == NORM_INFINITY) { 153 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 154 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 155 } 156 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 157 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 158 } 159 160 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 161 if (type == NORM_INFINITY) { 162 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 163 } else { 164 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 165 } 166 ierr = PetscFree(work);CHKERRQ(ierr); 167 if (type == NORM_2) { 168 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 169 } 170 PetscFunctionReturn(0); 171 } 172 173 #undef __FUNCT__ 174 #define __FUNCT__ "MatDistribute_MPIAIJ" 175 /* 176 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 177 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 178 179 Only for square matrices 180 181 Used by a preconditioner, hence PETSC_EXTERN 182 */ 183 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 184 { 185 PetscMPIInt rank,size; 186 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 187 PetscErrorCode ierr; 188 Mat mat; 189 Mat_SeqAIJ *gmata; 190 PetscMPIInt tag; 191 MPI_Status status; 192 PetscBool aij; 193 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 194 195 PetscFunctionBegin; 196 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 197 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 198 if (!rank) { 199 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 200 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 201 } 202 if (reuse == MAT_INITIAL_MATRIX) { 203 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 204 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 205 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 206 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 207 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 208 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 209 ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr); 210 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 211 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 212 213 rowners[0] = 0; 214 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 215 rstart = rowners[rank]; 216 rend = rowners[rank+1]; 217 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 218 if (!rank) { 219 gmata = (Mat_SeqAIJ*) gmat->data; 220 /* send row lengths to all processors */ 221 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 222 for (i=1; i<size; i++) { 223 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 224 } 225 /* determine number diagonal and off-diagonal counts */ 226 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 227 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 228 jj = 0; 229 for (i=0; i<m; i++) { 230 for (j=0; j<dlens[i]; j++) { 231 if (gmata->j[jj] < rstart) ld[i]++; 232 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 233 jj++; 234 } 235 } 236 /* send column indices to other processes */ 237 for (i=1; i<size; i++) { 238 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 239 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 240 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 241 } 242 243 /* send numerical values to other processes */ 244 for (i=1; i<size; i++) { 245 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 246 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 247 } 248 gmataa = gmata->a; 249 gmataj = gmata->j; 250 251 } else { 252 /* receive row lengths */ 253 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 254 /* receive column indices */ 255 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 256 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 257 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 258 /* determine number diagonal and off-diagonal counts */ 259 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 260 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 261 jj = 0; 262 for (i=0; i<m; i++) { 263 for (j=0; j<dlens[i]; j++) { 264 if (gmataj[jj] < rstart) ld[i]++; 265 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 266 jj++; 267 } 268 } 269 /* receive numerical values */ 270 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 271 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 272 } 273 /* set preallocation */ 274 for (i=0; i<m; i++) { 275 dlens[i] -= olens[i]; 276 } 277 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 278 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 279 280 for (i=0; i<m; i++) { 281 dlens[i] += olens[i]; 282 } 283 cnt = 0; 284 for (i=0; i<m; i++) { 285 row = rstart + i; 286 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 287 cnt += dlens[i]; 288 } 289 if (rank) { 290 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 291 } 292 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 293 ierr = PetscFree(rowners);CHKERRQ(ierr); 294 295 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 296 297 *inmat = mat; 298 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 299 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 300 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 301 mat = *inmat; 302 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 303 if (!rank) { 304 /* send numerical values to other processes */ 305 gmata = (Mat_SeqAIJ*) gmat->data; 306 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 307 gmataa = gmata->a; 308 for (i=1; i<size; i++) { 309 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 310 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 311 } 312 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 313 } else { 314 /* receive numerical values from process 0*/ 315 nz = Ad->nz + Ao->nz; 316 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 317 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 318 } 319 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 320 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 321 ad = Ad->a; 322 ao = Ao->a; 323 if (mat->rmap->n) { 324 i = 0; 325 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 326 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 327 } 328 for (i=1; i<mat->rmap->n; i++) { 329 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 330 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 331 } 332 i--; 333 if (mat->rmap->n) { 334 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 335 } 336 if (rank) { 337 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 338 } 339 } 340 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 341 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 342 PetscFunctionReturn(0); 343 } 344 345 /* 346 Local utility routine that creates a mapping from the global column 347 number to the local number in the off-diagonal part of the local 348 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 349 a slightly higher hash table cost; without it it is not scalable (each processor 350 has an order N integer array but is fast to acess. 351 */ 352 #undef __FUNCT__ 353 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 354 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 355 { 356 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 357 PetscErrorCode ierr; 358 PetscInt n = aij->B->cmap->n,i; 359 360 PetscFunctionBegin; 361 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 362 #if defined(PETSC_USE_CTABLE) 363 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 364 for (i=0; i<n; i++) { 365 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 366 } 367 #else 368 ierr = PetscCalloc1((mat->cmap->N+1),&aij->colmap);CHKERRQ(ierr); 369 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 370 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 371 #endif 372 PetscFunctionReturn(0); 373 } 374 375 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 376 { \ 377 if (col <= lastcol1) low1 = 0; \ 378 else high1 = nrow1; \ 379 lastcol1 = col;\ 380 while (high1-low1 > 5) { \ 381 t = (low1+high1)/2; \ 382 if (rp1[t] > col) high1 = t; \ 383 else low1 = t; \ 384 } \ 385 for (_i=low1; _i<high1; _i++) { \ 386 if (rp1[_i] > col) break; \ 387 if (rp1[_i] == col) { \ 388 if (addv == ADD_VALUES) ap1[_i] += value; \ 389 else ap1[_i] = value; \ 390 goto a_noinsert; \ 391 } \ 392 } \ 393 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 394 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 395 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 396 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 397 N = nrow1++ - 1; a->nz++; high1++; \ 398 /* shift up all the later entries in this row */ \ 399 for (ii=N; ii>=_i; ii--) { \ 400 rp1[ii+1] = rp1[ii]; \ 401 ap1[ii+1] = ap1[ii]; \ 402 } \ 403 rp1[_i] = col; \ 404 ap1[_i] = value; \ 405 A->nonzerostate++;\ 406 a_noinsert: ; \ 407 ailen[row] = nrow1; \ 408 } 409 410 411 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 412 { \ 413 if (col <= lastcol2) low2 = 0; \ 414 else high2 = nrow2; \ 415 lastcol2 = col; \ 416 while (high2-low2 > 5) { \ 417 t = (low2+high2)/2; \ 418 if (rp2[t] > col) high2 = t; \ 419 else low2 = t; \ 420 } \ 421 for (_i=low2; _i<high2; _i++) { \ 422 if (rp2[_i] > col) break; \ 423 if (rp2[_i] == col) { \ 424 if (addv == ADD_VALUES) ap2[_i] += value; \ 425 else ap2[_i] = value; \ 426 goto b_noinsert; \ 427 } \ 428 } \ 429 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 430 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 431 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 432 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 433 N = nrow2++ - 1; b->nz++; high2++; \ 434 /* shift up all the later entries in this row */ \ 435 for (ii=N; ii>=_i; ii--) { \ 436 rp2[ii+1] = rp2[ii]; \ 437 ap2[ii+1] = ap2[ii]; \ 438 } \ 439 rp2[_i] = col; \ 440 ap2[_i] = value; \ 441 B->nonzerostate++; \ 442 b_noinsert: ; \ 443 bilen[row] = nrow2; \ 444 } 445 446 #undef __FUNCT__ 447 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 448 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 449 { 450 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 451 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 452 PetscErrorCode ierr; 453 PetscInt l,*garray = mat->garray,diag; 454 455 PetscFunctionBegin; 456 /* code only works for square matrices A */ 457 458 /* find size of row to the left of the diagonal part */ 459 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 460 row = row - diag; 461 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 462 if (garray[b->j[b->i[row]+l]] > diag) break; 463 } 464 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 465 466 /* diagonal part */ 467 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 468 469 /* right of diagonal part */ 470 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 471 PetscFunctionReturn(0); 472 } 473 474 #undef __FUNCT__ 475 #define __FUNCT__ "MatSetValues_MPIAIJ" 476 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 477 { 478 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 479 PetscScalar value; 480 PetscErrorCode ierr; 481 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 482 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 483 PetscBool roworiented = aij->roworiented; 484 485 /* Some Variables required in the macro */ 486 Mat A = aij->A; 487 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 488 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 489 MatScalar *aa = a->a; 490 PetscBool ignorezeroentries = a->ignorezeroentries; 491 Mat B = aij->B; 492 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 493 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 494 MatScalar *ba = b->a; 495 496 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 497 PetscInt nonew; 498 MatScalar *ap1,*ap2; 499 500 PetscFunctionBegin; 501 for (i=0; i<m; i++) { 502 if (im[i] < 0) continue; 503 #if defined(PETSC_USE_DEBUG) 504 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 505 #endif 506 if (im[i] >= rstart && im[i] < rend) { 507 row = im[i] - rstart; 508 lastcol1 = -1; 509 rp1 = aj + ai[row]; 510 ap1 = aa + ai[row]; 511 rmax1 = aimax[row]; 512 nrow1 = ailen[row]; 513 low1 = 0; 514 high1 = nrow1; 515 lastcol2 = -1; 516 rp2 = bj + bi[row]; 517 ap2 = ba + bi[row]; 518 rmax2 = bimax[row]; 519 nrow2 = bilen[row]; 520 low2 = 0; 521 high2 = nrow2; 522 523 for (j=0; j<n; j++) { 524 if (roworiented) value = v[i*n+j]; 525 else value = v[i+j*m]; 526 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 527 if (in[j] >= cstart && in[j] < cend) { 528 col = in[j] - cstart; 529 nonew = a->nonew; 530 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 531 } else if (in[j] < 0) continue; 532 #if defined(PETSC_USE_DEBUG) 533 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 534 #endif 535 else { 536 if (mat->was_assembled) { 537 if (!aij->colmap) { 538 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 539 } 540 #if defined(PETSC_USE_CTABLE) 541 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 542 col--; 543 #else 544 col = aij->colmap[in[j]] - 1; 545 #endif 546 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 547 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 548 col = in[j]; 549 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 550 B = aij->B; 551 b = (Mat_SeqAIJ*)B->data; 552 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 553 rp2 = bj + bi[row]; 554 ap2 = ba + bi[row]; 555 rmax2 = bimax[row]; 556 nrow2 = bilen[row]; 557 low2 = 0; 558 high2 = nrow2; 559 bm = aij->B->rmap->n; 560 ba = b->a; 561 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 562 } else col = in[j]; 563 nonew = b->nonew; 564 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 565 } 566 } 567 } else { 568 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 569 if (!aij->donotstash) { 570 mat->assembled = PETSC_FALSE; 571 if (roworiented) { 572 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 573 } else { 574 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 575 } 576 } 577 } 578 } 579 PetscFunctionReturn(0); 580 } 581 582 #undef __FUNCT__ 583 #define __FUNCT__ "MatGetValues_MPIAIJ" 584 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 585 { 586 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 587 PetscErrorCode ierr; 588 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 589 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 590 591 PetscFunctionBegin; 592 for (i=0; i<m; i++) { 593 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 594 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 595 if (idxm[i] >= rstart && idxm[i] < rend) { 596 row = idxm[i] - rstart; 597 for (j=0; j<n; j++) { 598 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 599 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 600 if (idxn[j] >= cstart && idxn[j] < cend) { 601 col = idxn[j] - cstart; 602 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 603 } else { 604 if (!aij->colmap) { 605 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 606 } 607 #if defined(PETSC_USE_CTABLE) 608 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 609 col--; 610 #else 611 col = aij->colmap[idxn[j]] - 1; 612 #endif 613 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 614 else { 615 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 616 } 617 } 618 } 619 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 620 } 621 PetscFunctionReturn(0); 622 } 623 624 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 625 626 #undef __FUNCT__ 627 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 628 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 629 { 630 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 631 PetscErrorCode ierr; 632 PetscInt nstash,reallocs; 633 InsertMode addv; 634 635 PetscFunctionBegin; 636 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 637 638 /* make sure all processors are either in INSERTMODE or ADDMODE */ 639 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 640 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 641 mat->insertmode = addv; /* in case this processor had no cache */ 642 643 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 644 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 645 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 646 PetscFunctionReturn(0); 647 } 648 649 #undef __FUNCT__ 650 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 651 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 652 { 653 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 654 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 655 PetscErrorCode ierr; 656 PetscMPIInt n; 657 PetscInt i,j,rstart,ncols,flg; 658 PetscInt *row,*col; 659 PetscBool other_disassembled; 660 PetscScalar *val; 661 InsertMode addv = mat->insertmode; 662 663 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 664 665 PetscFunctionBegin; 666 if (!aij->donotstash && !mat->nooffprocentries) { 667 while (1) { 668 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 669 if (!flg) break; 670 671 for (i=0; i<n; ) { 672 /* Now identify the consecutive vals belonging to the same row */ 673 for (j=i,rstart=row[j]; j<n; j++) { 674 if (row[j] != rstart) break; 675 } 676 if (j < n) ncols = j-i; 677 else ncols = n-i; 678 /* Now assemble all these values with a single function call */ 679 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 680 681 i = j; 682 } 683 } 684 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 685 } 686 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 687 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 688 689 /* determine if any processor has disassembled, if so we must 690 also disassemble ourselfs, in order that we may reassemble. */ 691 /* 692 if nonzero structure of submatrix B cannot change then we know that 693 no processor disassembled thus we can skip this stuff 694 */ 695 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 696 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 697 if (mat->was_assembled && !other_disassembled) { 698 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 699 } 700 } 701 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 702 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 703 } 704 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 705 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 706 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 707 708 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 709 710 aij->rowvalues = 0; 711 712 /* used by MatAXPY() */ 713 a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 714 a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 715 716 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 717 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 718 719 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 720 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 721 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 722 ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 723 } 724 PetscFunctionReturn(0); 725 } 726 727 #undef __FUNCT__ 728 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 729 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 730 { 731 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 732 PetscErrorCode ierr; 733 734 PetscFunctionBegin; 735 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 736 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 737 PetscFunctionReturn(0); 738 } 739 740 #undef __FUNCT__ 741 #define __FUNCT__ "MatZeroRows_MPIAIJ" 742 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 743 { 744 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 745 PetscInt *owners = A->rmap->range; 746 PetscInt n = A->rmap->n; 747 PetscMPIInt size = mat->size; 748 PetscSF sf; 749 PetscInt *lrows; 750 PetscSFNode *rrows; 751 PetscInt lastidx = -1, r, p = 0, len = 0; 752 PetscErrorCode ierr; 753 754 PetscFunctionBegin; 755 /* Create SF where leaves are input rows and roots are owned rows */ 756 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 757 for (r = 0; r < n; ++r) lrows[r] = -1; 758 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 759 for (r = 0; r < N; ++r) { 760 const PetscInt idx = rows[r]; 761 PetscBool found = PETSC_FALSE; 762 /* Trick for efficient searching for sorted rows */ 763 if (lastidx > idx) p = 0; 764 lastidx = idx; 765 for (; p < size; ++p) { 766 if (idx >= owners[p] && idx < owners[p+1]) { 767 rrows[r].rank = p; 768 rrows[r].index = rows[r] - owners[p]; 769 found = PETSC_TRUE; 770 break; 771 } 772 } 773 if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx); 774 } 775 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 776 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 777 /* Collect flags for rows to be zeroed */ 778 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 779 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 780 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 781 /* Compress and put in row numbers */ 782 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 783 /* fix right hand side if needed */ 784 if (x && b) { 785 const PetscScalar *xx; 786 PetscScalar *bb; 787 788 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 789 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 790 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 791 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 792 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 793 } 794 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 795 ierr = MatZeroRows(mat->B, len, lrows, 0.0, 0,0);CHKERRQ(ierr); 796 if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) { 797 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 798 } else if (diag != 0.0) { 799 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 800 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 801 for (r = 0; r < len; ++r) { 802 const PetscInt row = lrows[r] + A->rmap->rstart; 803 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 804 } 805 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 806 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 807 } else { 808 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 809 } 810 ierr = PetscFree(lrows);CHKERRQ(ierr); 811 812 /* only change matrix nonzero state if pattern was allowed to be changed */ 813 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 814 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 815 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 816 } 817 PetscFunctionReturn(0); 818 } 819 820 #undef __FUNCT__ 821 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 822 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 823 { 824 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 825 PetscErrorCode ierr; 826 PetscMPIInt size = l->size,n = A->rmap->n,lastidx = -1; 827 PetscInt i,j,r,m,p = 0,len = 0; 828 PetscInt *lrows,*owners = A->rmap->range; 829 PetscSFNode *rrows; 830 PetscSF sf; 831 const PetscScalar *xx; 832 PetscScalar *bb,*mask; 833 Vec xmask,lmask; 834 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 835 const PetscInt *aj, *ii,*ridx; 836 PetscScalar *aa; 837 #if defined(PETSC_DEBUG) 838 PetscBool found = PETSC_FALSE; 839 #endif 840 841 PetscFunctionBegin; 842 /* Create SF where leaves are input rows and roots are owned rows */ 843 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 844 for (r = 0; r < n; ++r) lrows[r] = -1; 845 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 846 for (r = 0; r < N; ++r) { 847 const PetscInt idx = rows[r]; 848 PetscBool found = PETSC_FALSE; 849 /* Trick for efficient searching for sorted rows */ 850 if (lastidx > idx) p = 0; 851 lastidx = idx; 852 for (; p < size; ++p) { 853 if (idx >= owners[p] && idx < owners[p+1]) { 854 rrows[r].rank = p; 855 rrows[r].index = rows[r] - owners[p]; 856 found = PETSC_TRUE; 857 break; 858 } 859 } 860 if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx); 861 } 862 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 863 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 864 /* Collect flags for rows to be zeroed */ 865 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 866 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 867 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 868 /* Compress and put in row numbers */ 869 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 870 /* zero diagonal part of matrix */ 871 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 872 /* handle off diagonal part of matrix */ 873 ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr); 874 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 875 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 876 for (i=0; i<len; i++) bb[lrows[i]] = 1; 877 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 878 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 879 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 880 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 881 if (x) { 882 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 883 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 884 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 885 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 886 } 887 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 888 /* remove zeroed rows of off diagonal matrix */ 889 ii = aij->i; 890 for (i=0; i<len; i++) { 891 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 892 } 893 /* loop over all elements of off process part of matrix zeroing removed columns*/ 894 if (aij->compressedrow.use) { 895 m = aij->compressedrow.nrows; 896 ii = aij->compressedrow.i; 897 ridx = aij->compressedrow.rindex; 898 for (i=0; i<m; i++) { 899 n = ii[i+1] - ii[i]; 900 aj = aij->j + ii[i]; 901 aa = aij->a + ii[i]; 902 903 for (j=0; j<n; j++) { 904 if (PetscAbsScalar(mask[*aj])) { 905 if (b) bb[*ridx] -= *aa*xx[*aj]; 906 *aa = 0.0; 907 } 908 aa++; 909 aj++; 910 } 911 ridx++; 912 } 913 } else { /* do not use compressed row format */ 914 m = l->B->rmap->n; 915 for (i=0; i<m; i++) { 916 n = ii[i+1] - ii[i]; 917 aj = aij->j + ii[i]; 918 aa = aij->a + ii[i]; 919 for (j=0; j<n; j++) { 920 if (PetscAbsScalar(mask[*aj])) { 921 if (b) bb[i] -= *aa*xx[*aj]; 922 *aa = 0.0; 923 } 924 aa++; 925 aj++; 926 } 927 } 928 } 929 if (x) { 930 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 931 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 932 } 933 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 934 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 935 ierr = PetscFree(lrows);CHKERRQ(ierr); 936 937 /* only change matrix nonzero state if pattern was allowed to be changed */ 938 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 939 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 940 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 941 } 942 PetscFunctionReturn(0); 943 } 944 945 #undef __FUNCT__ 946 #define __FUNCT__ "MatMult_MPIAIJ" 947 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 948 { 949 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 950 PetscErrorCode ierr; 951 PetscInt nt; 952 953 PetscFunctionBegin; 954 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 955 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 956 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 957 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 958 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 959 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 960 PetscFunctionReturn(0); 961 } 962 963 #undef __FUNCT__ 964 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 965 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 966 { 967 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 968 PetscErrorCode ierr; 969 970 PetscFunctionBegin; 971 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 972 PetscFunctionReturn(0); 973 } 974 975 #undef __FUNCT__ 976 #define __FUNCT__ "MatMultAdd_MPIAIJ" 977 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 978 { 979 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 980 PetscErrorCode ierr; 981 982 PetscFunctionBegin; 983 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 984 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 985 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 986 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 987 PetscFunctionReturn(0); 988 } 989 990 #undef __FUNCT__ 991 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 992 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 993 { 994 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 995 PetscErrorCode ierr; 996 PetscBool merged; 997 998 PetscFunctionBegin; 999 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1000 /* do nondiagonal part */ 1001 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1002 if (!merged) { 1003 /* send it on its way */ 1004 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1005 /* do local part */ 1006 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1007 /* receive remote parts: note this assumes the values are not actually */ 1008 /* added in yy until the next line, */ 1009 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1010 } else { 1011 /* do local part */ 1012 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1013 /* send it on its way */ 1014 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1015 /* values actually were received in the Begin() but we need to call this nop */ 1016 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1017 } 1018 PetscFunctionReturn(0); 1019 } 1020 1021 #undef __FUNCT__ 1022 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1023 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1024 { 1025 MPI_Comm comm; 1026 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1027 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1028 IS Me,Notme; 1029 PetscErrorCode ierr; 1030 PetscInt M,N,first,last,*notme,i; 1031 PetscMPIInt size; 1032 1033 PetscFunctionBegin; 1034 /* Easy test: symmetric diagonal block */ 1035 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1036 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1037 if (!*f) PetscFunctionReturn(0); 1038 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1039 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1040 if (size == 1) PetscFunctionReturn(0); 1041 1042 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1043 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1044 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1045 ierr = PetscMalloc1((N-last+first),¬me);CHKERRQ(ierr); 1046 for (i=0; i<first; i++) notme[i] = i; 1047 for (i=last; i<M; i++) notme[i-last+first] = i; 1048 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1049 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1050 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1051 Aoff = Aoffs[0]; 1052 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1053 Boff = Boffs[0]; 1054 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1055 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1056 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1057 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1058 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1059 ierr = PetscFree(notme);CHKERRQ(ierr); 1060 PetscFunctionReturn(0); 1061 } 1062 1063 #undef __FUNCT__ 1064 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1065 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1066 { 1067 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1068 PetscErrorCode ierr; 1069 1070 PetscFunctionBegin; 1071 /* do nondiagonal part */ 1072 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1073 /* send it on its way */ 1074 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1075 /* do local part */ 1076 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1077 /* receive remote parts */ 1078 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1079 PetscFunctionReturn(0); 1080 } 1081 1082 /* 1083 This only works correctly for square matrices where the subblock A->A is the 1084 diagonal block 1085 */ 1086 #undef __FUNCT__ 1087 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1088 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1089 { 1090 PetscErrorCode ierr; 1091 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1092 1093 PetscFunctionBegin; 1094 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1095 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1096 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1097 PetscFunctionReturn(0); 1098 } 1099 1100 #undef __FUNCT__ 1101 #define __FUNCT__ "MatScale_MPIAIJ" 1102 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1103 { 1104 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1105 PetscErrorCode ierr; 1106 1107 PetscFunctionBegin; 1108 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1109 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1110 PetscFunctionReturn(0); 1111 } 1112 1113 #undef __FUNCT__ 1114 #define __FUNCT__ "MatDestroy_MPIAIJ" 1115 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1116 { 1117 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1118 PetscErrorCode ierr; 1119 1120 PetscFunctionBegin; 1121 #if defined(PETSC_USE_LOG) 1122 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1123 #endif 1124 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1125 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1126 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1127 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1128 #if defined(PETSC_USE_CTABLE) 1129 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1130 #else 1131 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1132 #endif 1133 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1134 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1135 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1136 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1137 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1138 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1139 1140 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1141 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1142 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1143 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1144 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1145 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1146 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1147 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1148 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1149 PetscFunctionReturn(0); 1150 } 1151 1152 #undef __FUNCT__ 1153 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1154 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1155 { 1156 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1157 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1158 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1159 PetscErrorCode ierr; 1160 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1161 int fd; 1162 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1163 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1164 PetscScalar *column_values; 1165 PetscInt message_count,flowcontrolcount; 1166 FILE *file; 1167 1168 PetscFunctionBegin; 1169 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1170 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1171 nz = A->nz + B->nz; 1172 if (!rank) { 1173 header[0] = MAT_FILE_CLASSID; 1174 header[1] = mat->rmap->N; 1175 header[2] = mat->cmap->N; 1176 1177 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1178 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1179 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1180 /* get largest number of rows any processor has */ 1181 rlen = mat->rmap->n; 1182 range = mat->rmap->range; 1183 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1184 } else { 1185 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1186 rlen = mat->rmap->n; 1187 } 1188 1189 /* load up the local row counts */ 1190 ierr = PetscMalloc1((rlen+1),&row_lengths);CHKERRQ(ierr); 1191 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1192 1193 /* store the row lengths to the file */ 1194 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1195 if (!rank) { 1196 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1197 for (i=1; i<size; i++) { 1198 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1199 rlen = range[i+1] - range[i]; 1200 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1201 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1202 } 1203 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1204 } else { 1205 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1206 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1207 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1208 } 1209 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1210 1211 /* load up the local column indices */ 1212 nzmax = nz; /* th processor needs space a largest processor needs */ 1213 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1214 ierr = PetscMalloc1((nzmax+1),&column_indices);CHKERRQ(ierr); 1215 cnt = 0; 1216 for (i=0; i<mat->rmap->n; i++) { 1217 for (j=B->i[i]; j<B->i[i+1]; j++) { 1218 if ((col = garray[B->j[j]]) > cstart) break; 1219 column_indices[cnt++] = col; 1220 } 1221 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1222 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1223 } 1224 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1225 1226 /* store the column indices to the file */ 1227 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1228 if (!rank) { 1229 MPI_Status status; 1230 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1231 for (i=1; i<size; i++) { 1232 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1233 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1234 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1235 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1236 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1237 } 1238 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1239 } else { 1240 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1241 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1242 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1243 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1244 } 1245 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1246 1247 /* load up the local column values */ 1248 ierr = PetscMalloc1((nzmax+1),&column_values);CHKERRQ(ierr); 1249 cnt = 0; 1250 for (i=0; i<mat->rmap->n; i++) { 1251 for (j=B->i[i]; j<B->i[i+1]; j++) { 1252 if (garray[B->j[j]] > cstart) break; 1253 column_values[cnt++] = B->a[j]; 1254 } 1255 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1256 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1257 } 1258 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1259 1260 /* store the column values to the file */ 1261 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1262 if (!rank) { 1263 MPI_Status status; 1264 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1265 for (i=1; i<size; i++) { 1266 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1267 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1268 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1269 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1270 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1271 } 1272 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1273 } else { 1274 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1275 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1276 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1277 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1278 } 1279 ierr = PetscFree(column_values);CHKERRQ(ierr); 1280 1281 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1282 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1283 PetscFunctionReturn(0); 1284 } 1285 1286 #include <petscdraw.h> 1287 #undef __FUNCT__ 1288 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1289 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1290 { 1291 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1292 PetscErrorCode ierr; 1293 PetscMPIInt rank = aij->rank,size = aij->size; 1294 PetscBool isdraw,iascii,isbinary; 1295 PetscViewer sviewer; 1296 PetscViewerFormat format; 1297 1298 PetscFunctionBegin; 1299 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1300 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1301 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1302 if (iascii) { 1303 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1304 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1305 MatInfo info; 1306 PetscBool inodes; 1307 1308 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1309 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1310 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1311 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1312 if (!inodes) { 1313 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1314 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1315 } else { 1316 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1317 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1318 } 1319 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1320 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1321 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1322 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1323 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1324 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1325 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1326 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1327 PetscFunctionReturn(0); 1328 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1329 PetscInt inodecount,inodelimit,*inodes; 1330 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1331 if (inodes) { 1332 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1333 } else { 1334 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1335 } 1336 PetscFunctionReturn(0); 1337 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1338 PetscFunctionReturn(0); 1339 } 1340 } else if (isbinary) { 1341 if (size == 1) { 1342 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1343 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1344 } else { 1345 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1346 } 1347 PetscFunctionReturn(0); 1348 } else if (isdraw) { 1349 PetscDraw draw; 1350 PetscBool isnull; 1351 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1352 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1353 } 1354 1355 { 1356 /* assemble the entire matrix onto first processor. */ 1357 Mat A; 1358 Mat_SeqAIJ *Aloc; 1359 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1360 MatScalar *a; 1361 1362 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1363 if (!rank) { 1364 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1365 } else { 1366 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1367 } 1368 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1369 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1370 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1371 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1372 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1373 1374 /* copy over the A part */ 1375 Aloc = (Mat_SeqAIJ*)aij->A->data; 1376 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1377 row = mat->rmap->rstart; 1378 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1379 for (i=0; i<m; i++) { 1380 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1381 row++; 1382 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1383 } 1384 aj = Aloc->j; 1385 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1386 1387 /* copy over the B part */ 1388 Aloc = (Mat_SeqAIJ*)aij->B->data; 1389 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1390 row = mat->rmap->rstart; 1391 ierr = PetscMalloc1((ai[m]+1),&cols);CHKERRQ(ierr); 1392 ct = cols; 1393 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1394 for (i=0; i<m; i++) { 1395 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1396 row++; 1397 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1398 } 1399 ierr = PetscFree(ct);CHKERRQ(ierr); 1400 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1401 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1402 /* 1403 Everyone has to call to draw the matrix since the graphics waits are 1404 synchronized across all processors that share the PetscDraw object 1405 */ 1406 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1407 if (!rank) { 1408 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1409 } 1410 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1411 ierr = MatDestroy(&A);CHKERRQ(ierr); 1412 } 1413 PetscFunctionReturn(0); 1414 } 1415 1416 #undef __FUNCT__ 1417 #define __FUNCT__ "MatView_MPIAIJ" 1418 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1419 { 1420 PetscErrorCode ierr; 1421 PetscBool iascii,isdraw,issocket,isbinary; 1422 1423 PetscFunctionBegin; 1424 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1425 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1426 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1427 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1428 if (iascii || isdraw || isbinary || issocket) { 1429 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1430 } 1431 PetscFunctionReturn(0); 1432 } 1433 1434 #undef __FUNCT__ 1435 #define __FUNCT__ "MatSOR_MPIAIJ" 1436 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1437 { 1438 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1439 PetscErrorCode ierr; 1440 Vec bb1 = 0; 1441 PetscBool hasop; 1442 1443 PetscFunctionBegin; 1444 if (flag == SOR_APPLY_UPPER) { 1445 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1446 PetscFunctionReturn(0); 1447 } 1448 1449 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1450 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1451 } 1452 1453 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1454 if (flag & SOR_ZERO_INITIAL_GUESS) { 1455 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1456 its--; 1457 } 1458 1459 while (its--) { 1460 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1461 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1462 1463 /* update rhs: bb1 = bb - B*x */ 1464 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1465 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1466 1467 /* local sweep */ 1468 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1469 } 1470 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1471 if (flag & SOR_ZERO_INITIAL_GUESS) { 1472 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1473 its--; 1474 } 1475 while (its--) { 1476 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1477 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1478 1479 /* update rhs: bb1 = bb - B*x */ 1480 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1481 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1482 1483 /* local sweep */ 1484 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1485 } 1486 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1487 if (flag & SOR_ZERO_INITIAL_GUESS) { 1488 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1489 its--; 1490 } 1491 while (its--) { 1492 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1493 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1494 1495 /* update rhs: bb1 = bb - B*x */ 1496 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1497 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1498 1499 /* local sweep */ 1500 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1501 } 1502 } else if (flag & SOR_EISENSTAT) { 1503 Vec xx1; 1504 1505 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1506 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1507 1508 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1509 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1510 if (!mat->diag) { 1511 ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1512 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1513 } 1514 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1515 if (hasop) { 1516 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1517 } else { 1518 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1519 } 1520 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1521 1522 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1523 1524 /* local sweep */ 1525 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1526 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1527 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1528 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1529 1530 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1531 PetscFunctionReturn(0); 1532 } 1533 1534 #undef __FUNCT__ 1535 #define __FUNCT__ "MatPermute_MPIAIJ" 1536 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1537 { 1538 Mat aA,aB,Aperm; 1539 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1540 PetscScalar *aa,*ba; 1541 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1542 PetscSF rowsf,sf; 1543 IS parcolp = NULL; 1544 PetscBool done; 1545 PetscErrorCode ierr; 1546 1547 PetscFunctionBegin; 1548 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1549 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1550 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1551 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1552 1553 /* Invert row permutation to find out where my rows should go */ 1554 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1555 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1556 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1557 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1558 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1559 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1560 1561 /* Invert column permutation to find out where my columns should go */ 1562 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1563 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1564 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1565 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1566 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1567 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1568 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1569 1570 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1571 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1572 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1573 1574 /* Find out where my gcols should go */ 1575 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1576 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1577 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1578 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1579 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1580 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1581 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1582 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1583 1584 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1585 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1586 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1587 for (i=0; i<m; i++) { 1588 PetscInt row = rdest[i],rowner; 1589 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1590 for (j=ai[i]; j<ai[i+1]; j++) { 1591 PetscInt cowner,col = cdest[aj[j]]; 1592 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1593 if (rowner == cowner) dnnz[i]++; 1594 else onnz[i]++; 1595 } 1596 for (j=bi[i]; j<bi[i+1]; j++) { 1597 PetscInt cowner,col = gcdest[bj[j]]; 1598 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1599 if (rowner == cowner) dnnz[i]++; 1600 else onnz[i]++; 1601 } 1602 } 1603 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1604 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1605 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1606 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1607 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1608 1609 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1610 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1611 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1612 for (i=0; i<m; i++) { 1613 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1614 PetscInt j0,rowlen; 1615 rowlen = ai[i+1] - ai[i]; 1616 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1617 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1618 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1619 } 1620 rowlen = bi[i+1] - bi[i]; 1621 for (j0=j=0; j<rowlen; j0=j) { 1622 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1623 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1624 } 1625 } 1626 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1627 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1628 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1629 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1630 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1631 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1632 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1633 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1634 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1635 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1636 *B = Aperm; 1637 PetscFunctionReturn(0); 1638 } 1639 1640 #undef __FUNCT__ 1641 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1642 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1643 { 1644 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1645 Mat A = mat->A,B = mat->B; 1646 PetscErrorCode ierr; 1647 PetscReal isend[5],irecv[5]; 1648 1649 PetscFunctionBegin; 1650 info->block_size = 1.0; 1651 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1652 1653 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1654 isend[3] = info->memory; isend[4] = info->mallocs; 1655 1656 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1657 1658 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1659 isend[3] += info->memory; isend[4] += info->mallocs; 1660 if (flag == MAT_LOCAL) { 1661 info->nz_used = isend[0]; 1662 info->nz_allocated = isend[1]; 1663 info->nz_unneeded = isend[2]; 1664 info->memory = isend[3]; 1665 info->mallocs = isend[4]; 1666 } else if (flag == MAT_GLOBAL_MAX) { 1667 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1668 1669 info->nz_used = irecv[0]; 1670 info->nz_allocated = irecv[1]; 1671 info->nz_unneeded = irecv[2]; 1672 info->memory = irecv[3]; 1673 info->mallocs = irecv[4]; 1674 } else if (flag == MAT_GLOBAL_SUM) { 1675 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1676 1677 info->nz_used = irecv[0]; 1678 info->nz_allocated = irecv[1]; 1679 info->nz_unneeded = irecv[2]; 1680 info->memory = irecv[3]; 1681 info->mallocs = irecv[4]; 1682 } 1683 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1684 info->fill_ratio_needed = 0; 1685 info->factor_mallocs = 0; 1686 PetscFunctionReturn(0); 1687 } 1688 1689 #undef __FUNCT__ 1690 #define __FUNCT__ "MatSetOption_MPIAIJ" 1691 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1692 { 1693 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1694 PetscErrorCode ierr; 1695 1696 PetscFunctionBegin; 1697 switch (op) { 1698 case MAT_NEW_NONZERO_LOCATIONS: 1699 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1700 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1701 case MAT_KEEP_NONZERO_PATTERN: 1702 case MAT_NEW_NONZERO_LOCATION_ERR: 1703 case MAT_USE_INODES: 1704 case MAT_IGNORE_ZERO_ENTRIES: 1705 MatCheckPreallocated(A,1); 1706 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1707 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1708 break; 1709 case MAT_ROW_ORIENTED: 1710 a->roworiented = flg; 1711 1712 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1713 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1714 break; 1715 case MAT_NEW_DIAGONALS: 1716 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1717 break; 1718 case MAT_IGNORE_OFF_PROC_ENTRIES: 1719 a->donotstash = flg; 1720 break; 1721 case MAT_SPD: 1722 A->spd_set = PETSC_TRUE; 1723 A->spd = flg; 1724 if (flg) { 1725 A->symmetric = PETSC_TRUE; 1726 A->structurally_symmetric = PETSC_TRUE; 1727 A->symmetric_set = PETSC_TRUE; 1728 A->structurally_symmetric_set = PETSC_TRUE; 1729 } 1730 break; 1731 case MAT_SYMMETRIC: 1732 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1733 break; 1734 case MAT_STRUCTURALLY_SYMMETRIC: 1735 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1736 break; 1737 case MAT_HERMITIAN: 1738 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1739 break; 1740 case MAT_SYMMETRY_ETERNAL: 1741 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1742 break; 1743 default: 1744 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1745 } 1746 PetscFunctionReturn(0); 1747 } 1748 1749 #undef __FUNCT__ 1750 #define __FUNCT__ "MatGetRow_MPIAIJ" 1751 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1752 { 1753 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1754 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1755 PetscErrorCode ierr; 1756 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1757 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1758 PetscInt *cmap,*idx_p; 1759 1760 PetscFunctionBegin; 1761 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1762 mat->getrowactive = PETSC_TRUE; 1763 1764 if (!mat->rowvalues && (idx || v)) { 1765 /* 1766 allocate enough space to hold information from the longest row. 1767 */ 1768 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1769 PetscInt max = 1,tmp; 1770 for (i=0; i<matin->rmap->n; i++) { 1771 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1772 if (max < tmp) max = tmp; 1773 } 1774 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1775 } 1776 1777 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1778 lrow = row - rstart; 1779 1780 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1781 if (!v) {pvA = 0; pvB = 0;} 1782 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1783 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1784 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1785 nztot = nzA + nzB; 1786 1787 cmap = mat->garray; 1788 if (v || idx) { 1789 if (nztot) { 1790 /* Sort by increasing column numbers, assuming A and B already sorted */ 1791 PetscInt imark = -1; 1792 if (v) { 1793 *v = v_p = mat->rowvalues; 1794 for (i=0; i<nzB; i++) { 1795 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1796 else break; 1797 } 1798 imark = i; 1799 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1800 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1801 } 1802 if (idx) { 1803 *idx = idx_p = mat->rowindices; 1804 if (imark > -1) { 1805 for (i=0; i<imark; i++) { 1806 idx_p[i] = cmap[cworkB[i]]; 1807 } 1808 } else { 1809 for (i=0; i<nzB; i++) { 1810 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1811 else break; 1812 } 1813 imark = i; 1814 } 1815 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1816 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1817 } 1818 } else { 1819 if (idx) *idx = 0; 1820 if (v) *v = 0; 1821 } 1822 } 1823 *nz = nztot; 1824 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1825 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1826 PetscFunctionReturn(0); 1827 } 1828 1829 #undef __FUNCT__ 1830 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1831 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1832 { 1833 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1834 1835 PetscFunctionBegin; 1836 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1837 aij->getrowactive = PETSC_FALSE; 1838 PetscFunctionReturn(0); 1839 } 1840 1841 #undef __FUNCT__ 1842 #define __FUNCT__ "MatNorm_MPIAIJ" 1843 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1844 { 1845 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1846 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1847 PetscErrorCode ierr; 1848 PetscInt i,j,cstart = mat->cmap->rstart; 1849 PetscReal sum = 0.0; 1850 MatScalar *v; 1851 1852 PetscFunctionBegin; 1853 if (aij->size == 1) { 1854 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1855 } else { 1856 if (type == NORM_FROBENIUS) { 1857 v = amat->a; 1858 for (i=0; i<amat->nz; i++) { 1859 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1860 } 1861 v = bmat->a; 1862 for (i=0; i<bmat->nz; i++) { 1863 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1864 } 1865 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1866 *norm = PetscSqrtReal(*norm); 1867 } else if (type == NORM_1) { /* max column norm */ 1868 PetscReal *tmp,*tmp2; 1869 PetscInt *jj,*garray = aij->garray; 1870 ierr = PetscCalloc1((mat->cmap->N+1),&tmp);CHKERRQ(ierr); 1871 ierr = PetscMalloc1((mat->cmap->N+1),&tmp2);CHKERRQ(ierr); 1872 *norm = 0.0; 1873 v = amat->a; jj = amat->j; 1874 for (j=0; j<amat->nz; j++) { 1875 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1876 } 1877 v = bmat->a; jj = bmat->j; 1878 for (j=0; j<bmat->nz; j++) { 1879 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1880 } 1881 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1882 for (j=0; j<mat->cmap->N; j++) { 1883 if (tmp2[j] > *norm) *norm = tmp2[j]; 1884 } 1885 ierr = PetscFree(tmp);CHKERRQ(ierr); 1886 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1887 } else if (type == NORM_INFINITY) { /* max row norm */ 1888 PetscReal ntemp = 0.0; 1889 for (j=0; j<aij->A->rmap->n; j++) { 1890 v = amat->a + amat->i[j]; 1891 sum = 0.0; 1892 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1893 sum += PetscAbsScalar(*v); v++; 1894 } 1895 v = bmat->a + bmat->i[j]; 1896 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1897 sum += PetscAbsScalar(*v); v++; 1898 } 1899 if (sum > ntemp) ntemp = sum; 1900 } 1901 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1902 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1903 } 1904 PetscFunctionReturn(0); 1905 } 1906 1907 #undef __FUNCT__ 1908 #define __FUNCT__ "MatTranspose_MPIAIJ" 1909 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1910 { 1911 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1912 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1913 PetscErrorCode ierr; 1914 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1915 PetscInt cstart = A->cmap->rstart,ncol; 1916 Mat B; 1917 MatScalar *array; 1918 1919 PetscFunctionBegin; 1920 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1921 1922 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1923 ai = Aloc->i; aj = Aloc->j; 1924 bi = Bloc->i; bj = Bloc->j; 1925 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1926 PetscInt *d_nnz,*g_nnz,*o_nnz; 1927 PetscSFNode *oloc; 1928 PETSC_UNUSED PetscSF sf; 1929 1930 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1931 /* compute d_nnz for preallocation */ 1932 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1933 for (i=0; i<ai[ma]; i++) { 1934 d_nnz[aj[i]]++; 1935 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1936 } 1937 /* compute local off-diagonal contributions */ 1938 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1939 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1940 /* map those to global */ 1941 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1942 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1943 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1944 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1945 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1946 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1947 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1948 1949 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1950 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1951 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1952 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1953 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1954 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 1955 } else { 1956 B = *matout; 1957 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1958 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1959 } 1960 1961 /* copy over the A part */ 1962 array = Aloc->a; 1963 row = A->rmap->rstart; 1964 for (i=0; i<ma; i++) { 1965 ncol = ai[i+1]-ai[i]; 1966 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1967 row++; 1968 array += ncol; aj += ncol; 1969 } 1970 aj = Aloc->j; 1971 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1972 1973 /* copy over the B part */ 1974 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 1975 array = Bloc->a; 1976 row = A->rmap->rstart; 1977 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1978 cols_tmp = cols; 1979 for (i=0; i<mb; i++) { 1980 ncol = bi[i+1]-bi[i]; 1981 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1982 row++; 1983 array += ncol; cols_tmp += ncol; 1984 } 1985 ierr = PetscFree(cols);CHKERRQ(ierr); 1986 1987 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1988 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1989 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1990 *matout = B; 1991 } else { 1992 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1993 } 1994 PetscFunctionReturn(0); 1995 } 1996 1997 #undef __FUNCT__ 1998 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1999 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2000 { 2001 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2002 Mat a = aij->A,b = aij->B; 2003 PetscErrorCode ierr; 2004 PetscInt s1,s2,s3; 2005 2006 PetscFunctionBegin; 2007 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2008 if (rr) { 2009 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2010 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2011 /* Overlap communication with computation. */ 2012 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2013 } 2014 if (ll) { 2015 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2016 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2017 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2018 } 2019 /* scale the diagonal block */ 2020 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2021 2022 if (rr) { 2023 /* Do a scatter end and then right scale the off-diagonal block */ 2024 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2025 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2026 } 2027 PetscFunctionReturn(0); 2028 } 2029 2030 #undef __FUNCT__ 2031 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2032 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2033 { 2034 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2035 PetscErrorCode ierr; 2036 2037 PetscFunctionBegin; 2038 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2039 PetscFunctionReturn(0); 2040 } 2041 2042 #undef __FUNCT__ 2043 #define __FUNCT__ "MatEqual_MPIAIJ" 2044 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2045 { 2046 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2047 Mat a,b,c,d; 2048 PetscBool flg; 2049 PetscErrorCode ierr; 2050 2051 PetscFunctionBegin; 2052 a = matA->A; b = matA->B; 2053 c = matB->A; d = matB->B; 2054 2055 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2056 if (flg) { 2057 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2058 } 2059 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2060 PetscFunctionReturn(0); 2061 } 2062 2063 #undef __FUNCT__ 2064 #define __FUNCT__ "MatCopy_MPIAIJ" 2065 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2066 { 2067 PetscErrorCode ierr; 2068 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2069 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2070 2071 PetscFunctionBegin; 2072 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2073 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2074 /* because of the column compression in the off-processor part of the matrix a->B, 2075 the number of columns in a->B and b->B may be different, hence we cannot call 2076 the MatCopy() directly on the two parts. If need be, we can provide a more 2077 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2078 then copying the submatrices */ 2079 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2080 } else { 2081 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2082 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2083 } 2084 PetscFunctionReturn(0); 2085 } 2086 2087 #undef __FUNCT__ 2088 #define __FUNCT__ "MatSetUp_MPIAIJ" 2089 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2090 { 2091 PetscErrorCode ierr; 2092 2093 PetscFunctionBegin; 2094 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2095 PetscFunctionReturn(0); 2096 } 2097 2098 #undef __FUNCT__ 2099 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2100 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2101 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2102 { 2103 PetscInt i,m=Y->rmap->N; 2104 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2105 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2106 const PetscInt *xi = x->i,*yi = y->i; 2107 2108 PetscFunctionBegin; 2109 /* Set the number of nonzeros in the new matrix */ 2110 for (i=0; i<m; i++) { 2111 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2112 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2113 nnz[i] = 0; 2114 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2115 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2116 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2117 nnz[i]++; 2118 } 2119 for (; k<nzy; k++) nnz[i]++; 2120 } 2121 PetscFunctionReturn(0); 2122 } 2123 2124 #undef __FUNCT__ 2125 #define __FUNCT__ "MatAXPY_MPIAIJ" 2126 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2127 { 2128 PetscErrorCode ierr; 2129 PetscInt i; 2130 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2131 PetscBLASInt bnz,one=1; 2132 Mat_SeqAIJ *x,*y; 2133 2134 PetscFunctionBegin; 2135 if (str == SAME_NONZERO_PATTERN) { 2136 PetscScalar alpha = a; 2137 x = (Mat_SeqAIJ*)xx->A->data; 2138 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2139 y = (Mat_SeqAIJ*)yy->A->data; 2140 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2141 x = (Mat_SeqAIJ*)xx->B->data; 2142 y = (Mat_SeqAIJ*)yy->B->data; 2143 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2144 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2145 } else if (str == SUBSET_NONZERO_PATTERN) { 2146 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2147 2148 x = (Mat_SeqAIJ*)xx->B->data; 2149 y = (Mat_SeqAIJ*)yy->B->data; 2150 if (y->xtoy && y->XtoY != xx->B) { 2151 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2152 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2153 } 2154 if (!y->xtoy) { /* get xtoy */ 2155 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2156 y->XtoY = xx->B; 2157 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2158 } 2159 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2160 } else { 2161 Mat B; 2162 PetscInt *nnz_d,*nnz_o; 2163 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2164 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2165 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2166 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2167 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2168 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2169 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2170 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2171 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2172 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2173 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2174 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2175 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2176 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2177 } 2178 PetscFunctionReturn(0); 2179 } 2180 2181 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2182 2183 #undef __FUNCT__ 2184 #define __FUNCT__ "MatConjugate_MPIAIJ" 2185 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2186 { 2187 #if defined(PETSC_USE_COMPLEX) 2188 PetscErrorCode ierr; 2189 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2190 2191 PetscFunctionBegin; 2192 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2193 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2194 #else 2195 PetscFunctionBegin; 2196 #endif 2197 PetscFunctionReturn(0); 2198 } 2199 2200 #undef __FUNCT__ 2201 #define __FUNCT__ "MatRealPart_MPIAIJ" 2202 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2203 { 2204 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2205 PetscErrorCode ierr; 2206 2207 PetscFunctionBegin; 2208 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2209 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2210 PetscFunctionReturn(0); 2211 } 2212 2213 #undef __FUNCT__ 2214 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2215 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2216 { 2217 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2218 PetscErrorCode ierr; 2219 2220 PetscFunctionBegin; 2221 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2222 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2223 PetscFunctionReturn(0); 2224 } 2225 2226 #if defined(PETSC_HAVE_PBGL) 2227 2228 #include <boost/parallel/mpi/bsp_process_group.hpp> 2229 #include <boost/graph/distributed/ilu_default_graph.hpp> 2230 #include <boost/graph/distributed/ilu_0_block.hpp> 2231 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2232 #include <boost/graph/distributed/petsc/interface.hpp> 2233 #include <boost/multi_array.hpp> 2234 #include <boost/parallel/distributed_property_map->hpp> 2235 2236 #undef __FUNCT__ 2237 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2238 /* 2239 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2240 */ 2241 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2242 { 2243 namespace petsc = boost::distributed::petsc; 2244 2245 namespace graph_dist = boost::graph::distributed; 2246 using boost::graph::distributed::ilu_default::process_group_type; 2247 using boost::graph::ilu_permuted; 2248 2249 PetscBool row_identity, col_identity; 2250 PetscContainer c; 2251 PetscInt m, n, M, N; 2252 PetscErrorCode ierr; 2253 2254 PetscFunctionBegin; 2255 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2256 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2257 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2258 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2259 2260 process_group_type pg; 2261 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2262 lgraph_type *lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2263 lgraph_type& level_graph = *lgraph_p; 2264 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2265 2266 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2267 ilu_permuted(level_graph); 2268 2269 /* put together the new matrix */ 2270 ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr); 2271 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2272 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2273 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2274 ierr = MatSetBlockSizesFromMats(fact,A,A);CHKERRQ(ierr); 2275 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2276 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2277 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2278 2279 ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c); 2280 ierr = PetscContainerSetPointer(c, lgraph_p); 2281 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2282 ierr = PetscContainerDestroy(&c); 2283 PetscFunctionReturn(0); 2284 } 2285 2286 #undef __FUNCT__ 2287 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2288 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2289 { 2290 PetscFunctionBegin; 2291 PetscFunctionReturn(0); 2292 } 2293 2294 #undef __FUNCT__ 2295 #define __FUNCT__ "MatSolve_MPIAIJ" 2296 /* 2297 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2298 */ 2299 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2300 { 2301 namespace graph_dist = boost::graph::distributed; 2302 2303 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2304 lgraph_type *lgraph_p; 2305 PetscContainer c; 2306 PetscErrorCode ierr; 2307 2308 PetscFunctionBegin; 2309 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr); 2310 ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr); 2311 ierr = VecCopy(b, x);CHKERRQ(ierr); 2312 2313 PetscScalar *array_x; 2314 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2315 PetscInt sx; 2316 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2317 2318 PetscScalar *array_b; 2319 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2320 PetscInt sb; 2321 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2322 2323 lgraph_type& level_graph = *lgraph_p; 2324 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2325 2326 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2327 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]); 2328 array_ref_type ref_x(array_x, boost::extents[num_vertices(graph)]); 2329 2330 typedef boost::iterator_property_map<array_ref_type::iterator, 2331 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2332 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)); 2333 gvector_type vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2334 2335 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2336 PetscFunctionReturn(0); 2337 } 2338 #endif 2339 2340 #undef __FUNCT__ 2341 #define __FUNCT__ "MatDestroy_MatRedundant" 2342 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2343 { 2344 PetscErrorCode ierr; 2345 Mat_Redundant *redund; 2346 PetscInt i; 2347 PetscMPIInt size; 2348 2349 PetscFunctionBegin; 2350 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2351 if (size == 1) { 2352 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2353 redund = a->redundant; 2354 } else { 2355 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2356 redund = a->redundant; 2357 } 2358 if (redund){ 2359 if (redund->matseq) { /* via MatGetSubMatrices() */ 2360 ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr); 2361 ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr); 2362 ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr); 2363 ierr = PetscFree(redund->matseq);CHKERRQ(ierr); 2364 } else { 2365 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2366 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2367 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2368 for (i=0; i<redund->nrecvs; i++) { 2369 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2370 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2371 } 2372 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2373 } 2374 2375 if (redund->psubcomm) { 2376 ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr); 2377 } 2378 ierr = redund->Destroy(A);CHKERRQ(ierr); 2379 ierr = PetscFree(redund);CHKERRQ(ierr); 2380 } 2381 PetscFunctionReturn(0); 2382 } 2383 2384 #undef __FUNCT__ 2385 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced" 2386 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 2387 { 2388 PetscMPIInt rank,size; 2389 MPI_Comm comm; 2390 PetscErrorCode ierr; 2391 PetscInt nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N; 2392 PetscMPIInt *send_rank= NULL,*recv_rank=NULL,subrank,subsize; 2393 PetscInt *rowrange = mat->rmap->range; 2394 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2395 Mat A = aij->A,B=aij->B,C=*matredundant; 2396 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2397 PetscScalar *sbuf_a; 2398 PetscInt nzlocal=a->nz+b->nz; 2399 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2400 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray; 2401 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2402 MatScalar *aworkA,*aworkB; 2403 PetscScalar *vals; 2404 PetscMPIInt tag1,tag2,tag3,imdex; 2405 MPI_Request *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL; 2406 MPI_Request *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL; 2407 MPI_Status recv_status,*send_status; 2408 PetscInt *sbuf_nz=NULL,*rbuf_nz=NULL,count; 2409 PetscInt **rbuf_j=NULL; 2410 PetscScalar **rbuf_a=NULL; 2411 Mat_Redundant *redund =NULL; 2412 2413 PetscFunctionBegin; 2414 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2415 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2416 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2417 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2418 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2419 2420 if (reuse == MAT_REUSE_MATRIX) { 2421 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2422 if (subsize == 1) { 2423 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2424 redund = c->redundant; 2425 } else { 2426 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2427 redund = c->redundant; 2428 } 2429 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2430 2431 nsends = redund->nsends; 2432 nrecvs = redund->nrecvs; 2433 send_rank = redund->send_rank; 2434 recv_rank = redund->recv_rank; 2435 sbuf_nz = redund->sbuf_nz; 2436 rbuf_nz = redund->rbuf_nz; 2437 sbuf_j = redund->sbuf_j; 2438 sbuf_a = redund->sbuf_a; 2439 rbuf_j = redund->rbuf_j; 2440 rbuf_a = redund->rbuf_a; 2441 } 2442 2443 if (reuse == MAT_INITIAL_MATRIX) { 2444 PetscInt nleftover,np_subcomm; 2445 2446 /* get the destination processors' id send_rank, nsends and nrecvs */ 2447 ierr = PetscMalloc2(size,&send_rank,size,&recv_rank);CHKERRQ(ierr); 2448 2449 np_subcomm = size/nsubcomm; 2450 nleftover = size - nsubcomm*np_subcomm; 2451 2452 /* block of codes below is specific for INTERLACED */ 2453 /* ------------------------------------------------*/ 2454 nsends = 0; nrecvs = 0; 2455 for (i=0; i<size; i++) { 2456 if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */ 2457 send_rank[nsends++] = i; 2458 recv_rank[nrecvs++] = i; 2459 } 2460 } 2461 if (rank >= size - nleftover) { /* this proc is a leftover processor */ 2462 i = size-nleftover-1; 2463 j = 0; 2464 while (j < nsubcomm - nleftover) { 2465 send_rank[nsends++] = i; 2466 i--; j++; 2467 } 2468 } 2469 2470 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */ 2471 for (i=0; i<nleftover; i++) { 2472 recv_rank[nrecvs++] = size-nleftover+i; 2473 } 2474 } 2475 /*----------------------------------------------*/ 2476 2477 /* allocate sbuf_j, sbuf_a */ 2478 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2479 ierr = PetscMalloc1(i,&sbuf_j);CHKERRQ(ierr); 2480 ierr = PetscMalloc1((nzlocal+1),&sbuf_a);CHKERRQ(ierr); 2481 /* 2482 ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr); 2483 ierr = PetscSynchronizedFlush(comm,PETSC_STDOUT);CHKERRQ(ierr); 2484 */ 2485 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2486 2487 /* copy mat's local entries into the buffers */ 2488 if (reuse == MAT_INITIAL_MATRIX) { 2489 rownz_max = 0; 2490 rptr = sbuf_j; 2491 cols = sbuf_j + rend-rstart + 1; 2492 vals = sbuf_a; 2493 rptr[0] = 0; 2494 for (i=0; i<rend-rstart; i++) { 2495 row = i + rstart; 2496 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2497 ncols = nzA + nzB; 2498 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2499 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2500 /* load the column indices for this row into cols */ 2501 lwrite = 0; 2502 for (l=0; l<nzB; l++) { 2503 if ((ctmp = bmap[cworkB[l]]) < cstart) { 2504 vals[lwrite] = aworkB[l]; 2505 cols[lwrite++] = ctmp; 2506 } 2507 } 2508 for (l=0; l<nzA; l++) { 2509 vals[lwrite] = aworkA[l]; 2510 cols[lwrite++] = cstart + cworkA[l]; 2511 } 2512 for (l=0; l<nzB; l++) { 2513 if ((ctmp = bmap[cworkB[l]]) >= cend) { 2514 vals[lwrite] = aworkB[l]; 2515 cols[lwrite++] = ctmp; 2516 } 2517 } 2518 vals += ncols; 2519 cols += ncols; 2520 rptr[i+1] = rptr[i] + ncols; 2521 if (rownz_max < ncols) rownz_max = ncols; 2522 } 2523 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2524 } else { /* only copy matrix values into sbuf_a */ 2525 rptr = sbuf_j; 2526 vals = sbuf_a; 2527 rptr[0] = 0; 2528 for (i=0; i<rend-rstart; i++) { 2529 row = i + rstart; 2530 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2531 ncols = nzA + nzB; 2532 cworkB = b->j + b->i[i]; 2533 aworkA = a->a + a->i[i]; 2534 aworkB = b->a + b->i[i]; 2535 lwrite = 0; 2536 for (l=0; l<nzB; l++) { 2537 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2538 } 2539 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2540 for (l=0; l<nzB; l++) { 2541 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2542 } 2543 vals += ncols; 2544 rptr[i+1] = rptr[i] + ncols; 2545 } 2546 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2547 2548 /* send nzlocal to others, and recv other's nzlocal */ 2549 /*--------------------------------------------------*/ 2550 if (reuse == MAT_INITIAL_MATRIX) { 2551 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr); 2552 2553 s_waits2 = s_waits3 + nsends; 2554 s_waits1 = s_waits2 + nsends; 2555 r_waits1 = s_waits1 + nsends; 2556 r_waits2 = r_waits1 + nrecvs; 2557 r_waits3 = r_waits2 + nrecvs; 2558 } else { 2559 ierr = PetscMalloc2(nsends + nrecvs +1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr); 2560 2561 r_waits3 = s_waits3 + nsends; 2562 } 2563 2564 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2565 if (reuse == MAT_INITIAL_MATRIX) { 2566 /* get new tags to keep the communication clean */ 2567 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2568 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2569 ierr = PetscMalloc4(nsends,&sbuf_nz,nrecvs,&rbuf_nz,nrecvs,&rbuf_j,nrecvs,&rbuf_a);CHKERRQ(ierr); 2570 2571 /* post receives of other's nzlocal */ 2572 for (i=0; i<nrecvs; i++) { 2573 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2574 } 2575 /* send nzlocal to others */ 2576 for (i=0; i<nsends; i++) { 2577 sbuf_nz[i] = nzlocal; 2578 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2579 } 2580 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2581 count = nrecvs; 2582 while (count) { 2583 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2584 2585 recv_rank[imdex] = recv_status.MPI_SOURCE; 2586 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2587 ierr = PetscMalloc1((rbuf_nz[imdex]+1),&rbuf_a[imdex]);CHKERRQ(ierr); 2588 2589 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2590 2591 rbuf_nz[imdex] += i + 2; 2592 2593 ierr = PetscMalloc1(rbuf_nz[imdex],&rbuf_j[imdex]);CHKERRQ(ierr); 2594 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2595 count--; 2596 } 2597 /* wait on sends of nzlocal */ 2598 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2599 /* send mat->i,j to others, and recv from other's */ 2600 /*------------------------------------------------*/ 2601 for (i=0; i<nsends; i++) { 2602 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2603 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2604 } 2605 /* wait on receives of mat->i,j */ 2606 /*------------------------------*/ 2607 count = nrecvs; 2608 while (count) { 2609 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2610 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2611 count--; 2612 } 2613 /* wait on sends of mat->i,j */ 2614 /*---------------------------*/ 2615 if (nsends) { 2616 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2617 } 2618 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2619 2620 /* post receives, send and receive mat->a */ 2621 /*----------------------------------------*/ 2622 for (imdex=0; imdex<nrecvs; imdex++) { 2623 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2624 } 2625 for (i=0; i<nsends; i++) { 2626 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2627 } 2628 count = nrecvs; 2629 while (count) { 2630 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2631 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2632 count--; 2633 } 2634 if (nsends) { 2635 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2636 } 2637 2638 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2639 2640 /* create redundant matrix */ 2641 /*-------------------------*/ 2642 if (reuse == MAT_INITIAL_MATRIX) { 2643 const PetscInt *range; 2644 PetscInt rstart_sub,rend_sub,mloc_sub; 2645 2646 /* compute rownz_max for preallocation */ 2647 for (imdex=0; imdex<nrecvs; imdex++) { 2648 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2649 rptr = rbuf_j[imdex]; 2650 for (i=0; i<j; i++) { 2651 ncols = rptr[i+1] - rptr[i]; 2652 if (rownz_max < ncols) rownz_max = ncols; 2653 } 2654 } 2655 2656 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2657 2658 /* get local size of redundant matrix 2659 - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */ 2660 ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr); 2661 rstart_sub = range[nsubcomm*subrank]; 2662 if (subrank+1 < subsize) { /* not the last proc in subcomm */ 2663 rend_sub = range[nsubcomm*(subrank+1)]; 2664 } else { 2665 rend_sub = mat->rmap->N; 2666 } 2667 mloc_sub = rend_sub - rstart_sub; 2668 2669 if (M == N) { 2670 ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2671 } else { /* non-square matrix */ 2672 ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr); 2673 } 2674 ierr = MatSetBlockSizesFromMats(C,mat,mat);CHKERRQ(ierr); 2675 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2676 ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr); 2677 ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr); 2678 } else { 2679 C = *matredundant; 2680 } 2681 2682 /* insert local matrix entries */ 2683 rptr = sbuf_j; 2684 cols = sbuf_j + rend-rstart + 1; 2685 vals = sbuf_a; 2686 for (i=0; i<rend-rstart; i++) { 2687 row = i + rstart; 2688 ncols = rptr[i+1] - rptr[i]; 2689 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2690 vals += ncols; 2691 cols += ncols; 2692 } 2693 /* insert received matrix entries */ 2694 for (imdex=0; imdex<nrecvs; imdex++) { 2695 rstart = rowrange[recv_rank[imdex]]; 2696 rend = rowrange[recv_rank[imdex]+1]; 2697 /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */ 2698 rptr = rbuf_j[imdex]; 2699 cols = rbuf_j[imdex] + rend-rstart + 1; 2700 vals = rbuf_a[imdex]; 2701 for (i=0; i<rend-rstart; i++) { 2702 row = i + rstart; 2703 ncols = rptr[i+1] - rptr[i]; 2704 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2705 vals += ncols; 2706 cols += ncols; 2707 } 2708 } 2709 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2710 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2711 2712 if (reuse == MAT_INITIAL_MATRIX) { 2713 *matredundant = C; 2714 2715 /* create a supporting struct and attach it to C for reuse */ 2716 ierr = PetscNewLog(C,&redund);CHKERRQ(ierr); 2717 if (subsize == 1) { 2718 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2719 c->redundant = redund; 2720 } else { 2721 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2722 c->redundant = redund; 2723 } 2724 2725 redund->nzlocal = nzlocal; 2726 redund->nsends = nsends; 2727 redund->nrecvs = nrecvs; 2728 redund->send_rank = send_rank; 2729 redund->recv_rank = recv_rank; 2730 redund->sbuf_nz = sbuf_nz; 2731 redund->rbuf_nz = rbuf_nz; 2732 redund->sbuf_j = sbuf_j; 2733 redund->sbuf_a = sbuf_a; 2734 redund->rbuf_j = rbuf_j; 2735 redund->rbuf_a = rbuf_a; 2736 redund->psubcomm = NULL; 2737 2738 redund->Destroy = C->ops->destroy; 2739 C->ops->destroy = MatDestroy_MatRedundant; 2740 } 2741 PetscFunctionReturn(0); 2742 } 2743 2744 #undef __FUNCT__ 2745 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2746 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 2747 { 2748 PetscErrorCode ierr; 2749 MPI_Comm comm; 2750 PetscMPIInt size,subsize; 2751 PetscInt mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N; 2752 Mat_Redundant *redund=NULL; 2753 PetscSubcomm psubcomm=NULL; 2754 MPI_Comm subcomm_in=subcomm; 2755 Mat *matseq; 2756 IS isrow,iscol; 2757 2758 PetscFunctionBegin; 2759 if (subcomm_in == MPI_COMM_NULL) { /* user does not provide subcomm */ 2760 if (reuse == MAT_INITIAL_MATRIX) { 2761 /* create psubcomm, then get subcomm */ 2762 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2763 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2764 if (nsubcomm < 1 || nsubcomm > size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nsubcomm must between 1 and %D",size); 2765 2766 ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr); 2767 ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr); 2768 ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr); 2769 ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr); 2770 subcomm = psubcomm->comm; 2771 } else { /* retrieve psubcomm and subcomm */ 2772 ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr); 2773 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2774 if (subsize == 1) { 2775 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2776 redund = c->redundant; 2777 } else { 2778 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2779 redund = c->redundant; 2780 } 2781 psubcomm = redund->psubcomm; 2782 } 2783 if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) { 2784 ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,subcomm,reuse,matredundant);CHKERRQ(ierr); 2785 if (reuse == MAT_INITIAL_MATRIX) { /* psubcomm is created in this routine, free it in MatDestroy_MatRedundant() */ 2786 ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr); 2787 if (subsize == 1) { 2788 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2789 c->redundant->psubcomm = psubcomm; 2790 } else { 2791 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2792 c->redundant->psubcomm = psubcomm ; 2793 } 2794 } 2795 PetscFunctionReturn(0); 2796 } 2797 } 2798 2799 /* use MPI subcomm via MatGetSubMatrices(); use subcomm_in or psubcomm->comm (psubcomm->type != INTERLACED) */ 2800 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2801 if (reuse == MAT_INITIAL_MATRIX) { 2802 /* create a local sequential matrix matseq[0] */ 2803 mloc_sub = PETSC_DECIDE; 2804 ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr); 2805 ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr); 2806 rstart = rend - mloc_sub; 2807 ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr); 2808 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr); 2809 } else { /* reuse == MAT_REUSE_MATRIX */ 2810 if (subsize == 1) { 2811 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2812 redund = c->redundant; 2813 } else { 2814 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2815 redund = c->redundant; 2816 } 2817 2818 isrow = redund->isrow; 2819 iscol = redund->iscol; 2820 matseq = redund->matseq; 2821 } 2822 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr); 2823 ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr); 2824 2825 if (reuse == MAT_INITIAL_MATRIX) { 2826 /* create a supporting struct and attach it to C for reuse */ 2827 ierr = PetscNewLog(*matredundant,&redund);CHKERRQ(ierr); 2828 if (subsize == 1) { 2829 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2830 c->redundant = redund; 2831 } else { 2832 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2833 c->redundant = redund; 2834 } 2835 redund->isrow = isrow; 2836 redund->iscol = iscol; 2837 redund->matseq = matseq; 2838 redund->psubcomm = psubcomm; 2839 redund->Destroy = (*matredundant)->ops->destroy; 2840 (*matredundant)->ops->destroy = MatDestroy_MatRedundant; 2841 } 2842 PetscFunctionReturn(0); 2843 } 2844 2845 #undef __FUNCT__ 2846 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2847 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2848 { 2849 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2850 PetscErrorCode ierr; 2851 PetscInt i,*idxb = 0; 2852 PetscScalar *va,*vb; 2853 Vec vtmp; 2854 2855 PetscFunctionBegin; 2856 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2857 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2858 if (idx) { 2859 for (i=0; i<A->rmap->n; i++) { 2860 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2861 } 2862 } 2863 2864 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2865 if (idx) { 2866 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2867 } 2868 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2869 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2870 2871 for (i=0; i<A->rmap->n; i++) { 2872 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2873 va[i] = vb[i]; 2874 if (idx) idx[i] = a->garray[idxb[i]]; 2875 } 2876 } 2877 2878 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2879 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2880 ierr = PetscFree(idxb);CHKERRQ(ierr); 2881 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2882 PetscFunctionReturn(0); 2883 } 2884 2885 #undef __FUNCT__ 2886 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2887 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2888 { 2889 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2890 PetscErrorCode ierr; 2891 PetscInt i,*idxb = 0; 2892 PetscScalar *va,*vb; 2893 Vec vtmp; 2894 2895 PetscFunctionBegin; 2896 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2897 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2898 if (idx) { 2899 for (i=0; i<A->cmap->n; i++) { 2900 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2901 } 2902 } 2903 2904 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2905 if (idx) { 2906 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2907 } 2908 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2909 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2910 2911 for (i=0; i<A->rmap->n; i++) { 2912 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2913 va[i] = vb[i]; 2914 if (idx) idx[i] = a->garray[idxb[i]]; 2915 } 2916 } 2917 2918 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2919 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2920 ierr = PetscFree(idxb);CHKERRQ(ierr); 2921 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2922 PetscFunctionReturn(0); 2923 } 2924 2925 #undef __FUNCT__ 2926 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2927 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2928 { 2929 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2930 PetscInt n = A->rmap->n; 2931 PetscInt cstart = A->cmap->rstart; 2932 PetscInt *cmap = mat->garray; 2933 PetscInt *diagIdx, *offdiagIdx; 2934 Vec diagV, offdiagV; 2935 PetscScalar *a, *diagA, *offdiagA; 2936 PetscInt r; 2937 PetscErrorCode ierr; 2938 2939 PetscFunctionBegin; 2940 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2941 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2942 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2943 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2944 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2945 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2946 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2947 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2948 for (r = 0; r < n; ++r) { 2949 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2950 a[r] = diagA[r]; 2951 idx[r] = cstart + diagIdx[r]; 2952 } else { 2953 a[r] = offdiagA[r]; 2954 idx[r] = cmap[offdiagIdx[r]]; 2955 } 2956 } 2957 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2958 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2959 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2960 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2961 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2962 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2963 PetscFunctionReturn(0); 2964 } 2965 2966 #undef __FUNCT__ 2967 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2968 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2969 { 2970 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2971 PetscInt n = A->rmap->n; 2972 PetscInt cstart = A->cmap->rstart; 2973 PetscInt *cmap = mat->garray; 2974 PetscInt *diagIdx, *offdiagIdx; 2975 Vec diagV, offdiagV; 2976 PetscScalar *a, *diagA, *offdiagA; 2977 PetscInt r; 2978 PetscErrorCode ierr; 2979 2980 PetscFunctionBegin; 2981 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2982 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2983 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2984 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2985 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2986 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2987 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2988 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2989 for (r = 0; r < n; ++r) { 2990 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2991 a[r] = diagA[r]; 2992 idx[r] = cstart + diagIdx[r]; 2993 } else { 2994 a[r] = offdiagA[r]; 2995 idx[r] = cmap[offdiagIdx[r]]; 2996 } 2997 } 2998 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2999 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3000 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3001 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3002 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3003 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3004 PetscFunctionReturn(0); 3005 } 3006 3007 #undef __FUNCT__ 3008 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 3009 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 3010 { 3011 PetscErrorCode ierr; 3012 Mat *dummy; 3013 3014 PetscFunctionBegin; 3015 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 3016 *newmat = *dummy; 3017 ierr = PetscFree(dummy);CHKERRQ(ierr); 3018 PetscFunctionReturn(0); 3019 } 3020 3021 #undef __FUNCT__ 3022 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 3023 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3024 { 3025 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3026 PetscErrorCode ierr; 3027 3028 PetscFunctionBegin; 3029 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3030 PetscFunctionReturn(0); 3031 } 3032 3033 #undef __FUNCT__ 3034 #define __FUNCT__ "MatSetRandom_MPIAIJ" 3035 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 3036 { 3037 PetscErrorCode ierr; 3038 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 3039 3040 PetscFunctionBegin; 3041 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 3042 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 3043 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3044 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3045 PetscFunctionReturn(0); 3046 } 3047 3048 /* -------------------------------------------------------------------*/ 3049 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3050 MatGetRow_MPIAIJ, 3051 MatRestoreRow_MPIAIJ, 3052 MatMult_MPIAIJ, 3053 /* 4*/ MatMultAdd_MPIAIJ, 3054 MatMultTranspose_MPIAIJ, 3055 MatMultTransposeAdd_MPIAIJ, 3056 #if defined(PETSC_HAVE_PBGL) 3057 MatSolve_MPIAIJ, 3058 #else 3059 0, 3060 #endif 3061 0, 3062 0, 3063 /*10*/ 0, 3064 0, 3065 0, 3066 MatSOR_MPIAIJ, 3067 MatTranspose_MPIAIJ, 3068 /*15*/ MatGetInfo_MPIAIJ, 3069 MatEqual_MPIAIJ, 3070 MatGetDiagonal_MPIAIJ, 3071 MatDiagonalScale_MPIAIJ, 3072 MatNorm_MPIAIJ, 3073 /*20*/ MatAssemblyBegin_MPIAIJ, 3074 MatAssemblyEnd_MPIAIJ, 3075 MatSetOption_MPIAIJ, 3076 MatZeroEntries_MPIAIJ, 3077 /*24*/ MatZeroRows_MPIAIJ, 3078 0, 3079 #if defined(PETSC_HAVE_PBGL) 3080 0, 3081 #else 3082 0, 3083 #endif 3084 0, 3085 0, 3086 /*29*/ MatSetUp_MPIAIJ, 3087 #if defined(PETSC_HAVE_PBGL) 3088 0, 3089 #else 3090 0, 3091 #endif 3092 0, 3093 0, 3094 0, 3095 /*34*/ MatDuplicate_MPIAIJ, 3096 0, 3097 0, 3098 0, 3099 0, 3100 /*39*/ MatAXPY_MPIAIJ, 3101 MatGetSubMatrices_MPIAIJ, 3102 MatIncreaseOverlap_MPIAIJ, 3103 MatGetValues_MPIAIJ, 3104 MatCopy_MPIAIJ, 3105 /*44*/ MatGetRowMax_MPIAIJ, 3106 MatScale_MPIAIJ, 3107 0, 3108 0, 3109 MatZeroRowsColumns_MPIAIJ, 3110 /*49*/ MatSetRandom_MPIAIJ, 3111 0, 3112 0, 3113 0, 3114 0, 3115 /*54*/ MatFDColoringCreate_MPIXAIJ, 3116 0, 3117 MatSetUnfactored_MPIAIJ, 3118 MatPermute_MPIAIJ, 3119 0, 3120 /*59*/ MatGetSubMatrix_MPIAIJ, 3121 MatDestroy_MPIAIJ, 3122 MatView_MPIAIJ, 3123 0, 3124 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 3125 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 3126 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 3127 0, 3128 0, 3129 0, 3130 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3131 MatGetRowMinAbs_MPIAIJ, 3132 0, 3133 MatSetColoring_MPIAIJ, 3134 0, 3135 MatSetValuesAdifor_MPIAIJ, 3136 /*75*/ MatFDColoringApply_AIJ, 3137 0, 3138 0, 3139 0, 3140 MatFindZeroDiagonals_MPIAIJ, 3141 /*80*/ 0, 3142 0, 3143 0, 3144 /*83*/ MatLoad_MPIAIJ, 3145 0, 3146 0, 3147 0, 3148 0, 3149 0, 3150 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3151 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3152 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3153 MatPtAP_MPIAIJ_MPIAIJ, 3154 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3155 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 3156 0, 3157 0, 3158 0, 3159 0, 3160 /*99*/ 0, 3161 0, 3162 0, 3163 MatConjugate_MPIAIJ, 3164 0, 3165 /*104*/MatSetValuesRow_MPIAIJ, 3166 MatRealPart_MPIAIJ, 3167 MatImaginaryPart_MPIAIJ, 3168 0, 3169 0, 3170 /*109*/0, 3171 MatGetRedundantMatrix_MPIAIJ, 3172 MatGetRowMin_MPIAIJ, 3173 0, 3174 0, 3175 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3176 0, 3177 0, 3178 0, 3179 0, 3180 /*119*/0, 3181 0, 3182 0, 3183 0, 3184 MatGetMultiProcBlock_MPIAIJ, 3185 /*124*/MatFindNonzeroRows_MPIAIJ, 3186 MatGetColumnNorms_MPIAIJ, 3187 MatInvertBlockDiagonal_MPIAIJ, 3188 0, 3189 MatGetSubMatricesParallel_MPIAIJ, 3190 /*129*/0, 3191 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3192 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3193 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3194 0, 3195 /*134*/0, 3196 0, 3197 0, 3198 0, 3199 0, 3200 /*139*/0, 3201 0, 3202 0, 3203 MatFDColoringSetUp_MPIXAIJ 3204 }; 3205 3206 /* ----------------------------------------------------------------------------------------*/ 3207 3208 #undef __FUNCT__ 3209 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3210 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3211 { 3212 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3213 PetscErrorCode ierr; 3214 3215 PetscFunctionBegin; 3216 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3217 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3218 PetscFunctionReturn(0); 3219 } 3220 3221 #undef __FUNCT__ 3222 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3223 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3224 { 3225 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3226 PetscErrorCode ierr; 3227 3228 PetscFunctionBegin; 3229 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3230 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3231 PetscFunctionReturn(0); 3232 } 3233 3234 #undef __FUNCT__ 3235 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3236 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3237 { 3238 Mat_MPIAIJ *b; 3239 PetscErrorCode ierr; 3240 3241 PetscFunctionBegin; 3242 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3243 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3244 b = (Mat_MPIAIJ*)B->data; 3245 3246 if (!B->preallocated) { 3247 /* Explicitly create 2 MATSEQAIJ matrices. */ 3248 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3249 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3250 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 3251 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3252 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 3253 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3254 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3255 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 3256 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3257 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 3258 } 3259 3260 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3261 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3262 B->preallocated = PETSC_TRUE; 3263 PetscFunctionReturn(0); 3264 } 3265 3266 #undef __FUNCT__ 3267 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3268 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3269 { 3270 Mat mat; 3271 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3272 PetscErrorCode ierr; 3273 3274 PetscFunctionBegin; 3275 *newmat = 0; 3276 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3277 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3278 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 3279 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3280 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3281 a = (Mat_MPIAIJ*)mat->data; 3282 3283 mat->factortype = matin->factortype; 3284 mat->assembled = PETSC_TRUE; 3285 mat->insertmode = NOT_SET_VALUES; 3286 mat->preallocated = PETSC_TRUE; 3287 3288 a->size = oldmat->size; 3289 a->rank = oldmat->rank; 3290 a->donotstash = oldmat->donotstash; 3291 a->roworiented = oldmat->roworiented; 3292 a->rowindices = 0; 3293 a->rowvalues = 0; 3294 a->getrowactive = PETSC_FALSE; 3295 3296 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3297 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3298 3299 if (oldmat->colmap) { 3300 #if defined(PETSC_USE_CTABLE) 3301 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3302 #else 3303 ierr = PetscMalloc1((mat->cmap->N),&a->colmap);CHKERRQ(ierr); 3304 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3305 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3306 #endif 3307 } else a->colmap = 0; 3308 if (oldmat->garray) { 3309 PetscInt len; 3310 len = oldmat->B->cmap->n; 3311 ierr = PetscMalloc1((len+1),&a->garray);CHKERRQ(ierr); 3312 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3313 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3314 } else a->garray = 0; 3315 3316 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3317 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 3318 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3319 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 3320 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3321 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 3322 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3323 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3324 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3325 *newmat = mat; 3326 PetscFunctionReturn(0); 3327 } 3328 3329 3330 3331 #undef __FUNCT__ 3332 #define __FUNCT__ "MatLoad_MPIAIJ" 3333 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3334 { 3335 PetscScalar *vals,*svals; 3336 MPI_Comm comm; 3337 PetscErrorCode ierr; 3338 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3339 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3340 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3341 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 3342 PetscInt cend,cstart,n,*rowners,sizesset=1; 3343 int fd; 3344 PetscInt bs = 1; 3345 3346 PetscFunctionBegin; 3347 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3348 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3349 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3350 if (!rank) { 3351 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3352 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3353 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3354 } 3355 3356 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3357 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3358 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3359 3360 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3361 3362 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3363 M = header[1]; N = header[2]; 3364 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3365 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3366 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3367 3368 /* If global sizes are set, check if they are consistent with that given in the file */ 3369 if (sizesset) { 3370 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3371 } 3372 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3373 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3374 3375 /* determine ownership of all (block) rows */ 3376 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3377 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3378 else m = newMat->rmap->n; /* Set by user */ 3379 3380 ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr); 3381 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3382 3383 /* First process needs enough room for process with most rows */ 3384 if (!rank) { 3385 mmax = rowners[1]; 3386 for (i=2; i<=size; i++) { 3387 mmax = PetscMax(mmax, rowners[i]); 3388 } 3389 } else mmax = -1; /* unused, but compilers complain */ 3390 3391 rowners[0] = 0; 3392 for (i=2; i<=size; i++) { 3393 rowners[i] += rowners[i-1]; 3394 } 3395 rstart = rowners[rank]; 3396 rend = rowners[rank+1]; 3397 3398 /* distribute row lengths to all processors */ 3399 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 3400 if (!rank) { 3401 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3402 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 3403 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 3404 for (j=0; j<m; j++) { 3405 procsnz[0] += ourlens[j]; 3406 } 3407 for (i=1; i<size; i++) { 3408 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3409 /* calculate the number of nonzeros on each processor */ 3410 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3411 procsnz[i] += rowlengths[j]; 3412 } 3413 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3414 } 3415 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3416 } else { 3417 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3418 } 3419 3420 if (!rank) { 3421 /* determine max buffer needed and allocate it */ 3422 maxnz = 0; 3423 for (i=0; i<size; i++) { 3424 maxnz = PetscMax(maxnz,procsnz[i]); 3425 } 3426 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 3427 3428 /* read in my part of the matrix column indices */ 3429 nz = procsnz[0]; 3430 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3431 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3432 3433 /* read in every one elses and ship off */ 3434 for (i=1; i<size; i++) { 3435 nz = procsnz[i]; 3436 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3437 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3438 } 3439 ierr = PetscFree(cols);CHKERRQ(ierr); 3440 } else { 3441 /* determine buffer space needed for message */ 3442 nz = 0; 3443 for (i=0; i<m; i++) { 3444 nz += ourlens[i]; 3445 } 3446 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3447 3448 /* receive message of column indices*/ 3449 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3450 } 3451 3452 /* determine column ownership if matrix is not square */ 3453 if (N != M) { 3454 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3455 else n = newMat->cmap->n; 3456 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3457 cstart = cend - n; 3458 } else { 3459 cstart = rstart; 3460 cend = rend; 3461 n = cend - cstart; 3462 } 3463 3464 /* loop over local rows, determining number of off diagonal entries */ 3465 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3466 jj = 0; 3467 for (i=0; i<m; i++) { 3468 for (j=0; j<ourlens[i]; j++) { 3469 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3470 jj++; 3471 } 3472 } 3473 3474 for (i=0; i<m; i++) { 3475 ourlens[i] -= offlens[i]; 3476 } 3477 if (!sizesset) { 3478 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3479 } 3480 3481 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3482 3483 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3484 3485 for (i=0; i<m; i++) { 3486 ourlens[i] += offlens[i]; 3487 } 3488 3489 if (!rank) { 3490 ierr = PetscMalloc1((maxnz+1),&vals);CHKERRQ(ierr); 3491 3492 /* read in my part of the matrix numerical values */ 3493 nz = procsnz[0]; 3494 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3495 3496 /* insert into matrix */ 3497 jj = rstart; 3498 smycols = mycols; 3499 svals = vals; 3500 for (i=0; i<m; i++) { 3501 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3502 smycols += ourlens[i]; 3503 svals += ourlens[i]; 3504 jj++; 3505 } 3506 3507 /* read in other processors and ship out */ 3508 for (i=1; i<size; i++) { 3509 nz = procsnz[i]; 3510 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3511 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3512 } 3513 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3514 } else { 3515 /* receive numeric values */ 3516 ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr); 3517 3518 /* receive message of values*/ 3519 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3520 3521 /* insert into matrix */ 3522 jj = rstart; 3523 smycols = mycols; 3524 svals = vals; 3525 for (i=0; i<m; i++) { 3526 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3527 smycols += ourlens[i]; 3528 svals += ourlens[i]; 3529 jj++; 3530 } 3531 } 3532 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3533 ierr = PetscFree(vals);CHKERRQ(ierr); 3534 ierr = PetscFree(mycols);CHKERRQ(ierr); 3535 ierr = PetscFree(rowners);CHKERRQ(ierr); 3536 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3537 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3538 PetscFunctionReturn(0); 3539 } 3540 3541 #undef __FUNCT__ 3542 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3543 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3544 { 3545 PetscErrorCode ierr; 3546 IS iscol_local; 3547 PetscInt csize; 3548 3549 PetscFunctionBegin; 3550 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3551 if (call == MAT_REUSE_MATRIX) { 3552 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3553 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3554 } else { 3555 PetscInt cbs; 3556 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3557 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3558 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3559 } 3560 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3561 if (call == MAT_INITIAL_MATRIX) { 3562 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3563 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3564 } 3565 PetscFunctionReturn(0); 3566 } 3567 3568 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3569 #undef __FUNCT__ 3570 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3571 /* 3572 Not great since it makes two copies of the submatrix, first an SeqAIJ 3573 in local and then by concatenating the local matrices the end result. 3574 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3575 3576 Note: This requires a sequential iscol with all indices. 3577 */ 3578 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3579 { 3580 PetscErrorCode ierr; 3581 PetscMPIInt rank,size; 3582 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3583 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3584 PetscBool allcolumns, colflag; 3585 Mat M,Mreuse; 3586 MatScalar *vwork,*aa; 3587 MPI_Comm comm; 3588 Mat_SeqAIJ *aij; 3589 3590 PetscFunctionBegin; 3591 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3592 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3593 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3594 3595 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3596 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3597 if (colflag && ncol == mat->cmap->N) { 3598 allcolumns = PETSC_TRUE; 3599 } else { 3600 allcolumns = PETSC_FALSE; 3601 } 3602 if (call == MAT_REUSE_MATRIX) { 3603 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3604 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3605 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3606 } else { 3607 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3608 } 3609 3610 /* 3611 m - number of local rows 3612 n - number of columns (same on all processors) 3613 rstart - first row in new global matrix generated 3614 */ 3615 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3616 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3617 if (call == MAT_INITIAL_MATRIX) { 3618 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3619 ii = aij->i; 3620 jj = aij->j; 3621 3622 /* 3623 Determine the number of non-zeros in the diagonal and off-diagonal 3624 portions of the matrix in order to do correct preallocation 3625 */ 3626 3627 /* first get start and end of "diagonal" columns */ 3628 if (csize == PETSC_DECIDE) { 3629 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3630 if (mglobal == n) { /* square matrix */ 3631 nlocal = m; 3632 } else { 3633 nlocal = n/size + ((n % size) > rank); 3634 } 3635 } else { 3636 nlocal = csize; 3637 } 3638 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3639 rstart = rend - nlocal; 3640 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3641 3642 /* next, compute all the lengths */ 3643 ierr = PetscMalloc1((2*m+1),&dlens);CHKERRQ(ierr); 3644 olens = dlens + m; 3645 for (i=0; i<m; i++) { 3646 jend = ii[i+1] - ii[i]; 3647 olen = 0; 3648 dlen = 0; 3649 for (j=0; j<jend; j++) { 3650 if (*jj < rstart || *jj >= rend) olen++; 3651 else dlen++; 3652 jj++; 3653 } 3654 olens[i] = olen; 3655 dlens[i] = dlen; 3656 } 3657 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3658 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3659 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3660 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3661 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3662 ierr = PetscFree(dlens);CHKERRQ(ierr); 3663 } else { 3664 PetscInt ml,nl; 3665 3666 M = *newmat; 3667 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3668 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3669 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3670 /* 3671 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3672 rather than the slower MatSetValues(). 3673 */ 3674 M->was_assembled = PETSC_TRUE; 3675 M->assembled = PETSC_FALSE; 3676 } 3677 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3678 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3679 ii = aij->i; 3680 jj = aij->j; 3681 aa = aij->a; 3682 for (i=0; i<m; i++) { 3683 row = rstart + i; 3684 nz = ii[i+1] - ii[i]; 3685 cwork = jj; jj += nz; 3686 vwork = aa; aa += nz; 3687 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3688 } 3689 3690 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3691 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3692 *newmat = M; 3693 3694 /* save submatrix used in processor for next request */ 3695 if (call == MAT_INITIAL_MATRIX) { 3696 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3697 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3698 } 3699 PetscFunctionReturn(0); 3700 } 3701 3702 #undef __FUNCT__ 3703 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3704 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3705 { 3706 PetscInt m,cstart, cend,j,nnz,i,d; 3707 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3708 const PetscInt *JJ; 3709 PetscScalar *values; 3710 PetscErrorCode ierr; 3711 3712 PetscFunctionBegin; 3713 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3714 3715 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3716 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3717 m = B->rmap->n; 3718 cstart = B->cmap->rstart; 3719 cend = B->cmap->rend; 3720 rstart = B->rmap->rstart; 3721 3722 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3723 3724 #if defined(PETSC_USE_DEBUGGING) 3725 for (i=0; i<m; i++) { 3726 nnz = Ii[i+1]- Ii[i]; 3727 JJ = J + Ii[i]; 3728 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3729 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3730 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3731 } 3732 #endif 3733 3734 for (i=0; i<m; i++) { 3735 nnz = Ii[i+1]- Ii[i]; 3736 JJ = J + Ii[i]; 3737 nnz_max = PetscMax(nnz_max,nnz); 3738 d = 0; 3739 for (j=0; j<nnz; j++) { 3740 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3741 } 3742 d_nnz[i] = d; 3743 o_nnz[i] = nnz - d; 3744 } 3745 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3746 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3747 3748 if (v) values = (PetscScalar*)v; 3749 else { 3750 ierr = PetscCalloc1((nnz_max+1),&values);CHKERRQ(ierr); 3751 } 3752 3753 for (i=0; i<m; i++) { 3754 ii = i + rstart; 3755 nnz = Ii[i+1]- Ii[i]; 3756 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3757 } 3758 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3759 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3760 3761 if (!v) { 3762 ierr = PetscFree(values);CHKERRQ(ierr); 3763 } 3764 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3765 PetscFunctionReturn(0); 3766 } 3767 3768 #undef __FUNCT__ 3769 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3770 /*@ 3771 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3772 (the default parallel PETSc format). 3773 3774 Collective on MPI_Comm 3775 3776 Input Parameters: 3777 + B - the matrix 3778 . i - the indices into j for the start of each local row (starts with zero) 3779 . j - the column indices for each local row (starts with zero) 3780 - v - optional values in the matrix 3781 3782 Level: developer 3783 3784 Notes: 3785 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3786 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3787 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3788 3789 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3790 3791 The format which is used for the sparse matrix input, is equivalent to a 3792 row-major ordering.. i.e for the following matrix, the input data expected is 3793 as shown: 3794 3795 1 0 0 3796 2 0 3 P0 3797 ------- 3798 4 5 6 P1 3799 3800 Process0 [P0]: rows_owned=[0,1] 3801 i = {0,1,3} [size = nrow+1 = 2+1] 3802 j = {0,0,2} [size = nz = 6] 3803 v = {1,2,3} [size = nz = 6] 3804 3805 Process1 [P1]: rows_owned=[2] 3806 i = {0,3} [size = nrow+1 = 1+1] 3807 j = {0,1,2} [size = nz = 6] 3808 v = {4,5,6} [size = nz = 6] 3809 3810 .keywords: matrix, aij, compressed row, sparse, parallel 3811 3812 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3813 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3814 @*/ 3815 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3816 { 3817 PetscErrorCode ierr; 3818 3819 PetscFunctionBegin; 3820 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3821 PetscFunctionReturn(0); 3822 } 3823 3824 #undef __FUNCT__ 3825 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3826 /*@C 3827 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3828 (the default parallel PETSc format). For good matrix assembly performance 3829 the user should preallocate the matrix storage by setting the parameters 3830 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3831 performance can be increased by more than a factor of 50. 3832 3833 Collective on MPI_Comm 3834 3835 Input Parameters: 3836 + A - the matrix 3837 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3838 (same value is used for all local rows) 3839 . d_nnz - array containing the number of nonzeros in the various rows of the 3840 DIAGONAL portion of the local submatrix (possibly different for each row) 3841 or NULL, if d_nz is used to specify the nonzero structure. 3842 The size of this array is equal to the number of local rows, i.e 'm'. 3843 For matrices that will be factored, you must leave room for (and set) 3844 the diagonal entry even if it is zero. 3845 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3846 submatrix (same value is used for all local rows). 3847 - o_nnz - array containing the number of nonzeros in the various rows of the 3848 OFF-DIAGONAL portion of the local submatrix (possibly different for 3849 each row) or NULL, if o_nz is used to specify the nonzero 3850 structure. The size of this array is equal to the number 3851 of local rows, i.e 'm'. 3852 3853 If the *_nnz parameter is given then the *_nz parameter is ignored 3854 3855 The AIJ format (also called the Yale sparse matrix format or 3856 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3857 storage. The stored row and column indices begin with zero. 3858 See Users-Manual: ch_mat for details. 3859 3860 The parallel matrix is partitioned such that the first m0 rows belong to 3861 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3862 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3863 3864 The DIAGONAL portion of the local submatrix of a processor can be defined 3865 as the submatrix which is obtained by extraction the part corresponding to 3866 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3867 first row that belongs to the processor, r2 is the last row belonging to 3868 the this processor, and c1-c2 is range of indices of the local part of a 3869 vector suitable for applying the matrix to. This is an mxn matrix. In the 3870 common case of a square matrix, the row and column ranges are the same and 3871 the DIAGONAL part is also square. The remaining portion of the local 3872 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3873 3874 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3875 3876 You can call MatGetInfo() to get information on how effective the preallocation was; 3877 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3878 You can also run with the option -info and look for messages with the string 3879 malloc in them to see if additional memory allocation was needed. 3880 3881 Example usage: 3882 3883 Consider the following 8x8 matrix with 34 non-zero values, that is 3884 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3885 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3886 as follows: 3887 3888 .vb 3889 1 2 0 | 0 3 0 | 0 4 3890 Proc0 0 5 6 | 7 0 0 | 8 0 3891 9 0 10 | 11 0 0 | 12 0 3892 ------------------------------------- 3893 13 0 14 | 15 16 17 | 0 0 3894 Proc1 0 18 0 | 19 20 21 | 0 0 3895 0 0 0 | 22 23 0 | 24 0 3896 ------------------------------------- 3897 Proc2 25 26 27 | 0 0 28 | 29 0 3898 30 0 0 | 31 32 33 | 0 34 3899 .ve 3900 3901 This can be represented as a collection of submatrices as: 3902 3903 .vb 3904 A B C 3905 D E F 3906 G H I 3907 .ve 3908 3909 Where the submatrices A,B,C are owned by proc0, D,E,F are 3910 owned by proc1, G,H,I are owned by proc2. 3911 3912 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3913 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3914 The 'M','N' parameters are 8,8, and have the same values on all procs. 3915 3916 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3917 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3918 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3919 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3920 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3921 matrix, ans [DF] as another SeqAIJ matrix. 3922 3923 When d_nz, o_nz parameters are specified, d_nz storage elements are 3924 allocated for every row of the local diagonal submatrix, and o_nz 3925 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3926 One way to choose d_nz and o_nz is to use the max nonzerors per local 3927 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3928 In this case, the values of d_nz,o_nz are: 3929 .vb 3930 proc0 : dnz = 2, o_nz = 2 3931 proc1 : dnz = 3, o_nz = 2 3932 proc2 : dnz = 1, o_nz = 4 3933 .ve 3934 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3935 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3936 for proc3. i.e we are using 12+15+10=37 storage locations to store 3937 34 values. 3938 3939 When d_nnz, o_nnz parameters are specified, the storage is specified 3940 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3941 In the above case the values for d_nnz,o_nnz are: 3942 .vb 3943 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3944 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3945 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3946 .ve 3947 Here the space allocated is sum of all the above values i.e 34, and 3948 hence pre-allocation is perfect. 3949 3950 Level: intermediate 3951 3952 .keywords: matrix, aij, compressed row, sparse, parallel 3953 3954 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3955 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 3956 @*/ 3957 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3958 { 3959 PetscErrorCode ierr; 3960 3961 PetscFunctionBegin; 3962 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3963 PetscValidType(B,1); 3964 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3965 PetscFunctionReturn(0); 3966 } 3967 3968 #undef __FUNCT__ 3969 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3970 /*@ 3971 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3972 CSR format the local rows. 3973 3974 Collective on MPI_Comm 3975 3976 Input Parameters: 3977 + comm - MPI communicator 3978 . m - number of local rows (Cannot be PETSC_DECIDE) 3979 . n - This value should be the same as the local size used in creating the 3980 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3981 calculated if N is given) For square matrices n is almost always m. 3982 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3983 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3984 . i - row indices 3985 . j - column indices 3986 - a - matrix values 3987 3988 Output Parameter: 3989 . mat - the matrix 3990 3991 Level: intermediate 3992 3993 Notes: 3994 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3995 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3996 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3997 3998 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3999 4000 The format which is used for the sparse matrix input, is equivalent to a 4001 row-major ordering.. i.e for the following matrix, the input data expected is 4002 as shown: 4003 4004 1 0 0 4005 2 0 3 P0 4006 ------- 4007 4 5 6 P1 4008 4009 Process0 [P0]: rows_owned=[0,1] 4010 i = {0,1,3} [size = nrow+1 = 2+1] 4011 j = {0,0,2} [size = nz = 6] 4012 v = {1,2,3} [size = nz = 6] 4013 4014 Process1 [P1]: rows_owned=[2] 4015 i = {0,3} [size = nrow+1 = 1+1] 4016 j = {0,1,2} [size = nz = 6] 4017 v = {4,5,6} [size = nz = 6] 4018 4019 .keywords: matrix, aij, compressed row, sparse, parallel 4020 4021 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4022 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4023 @*/ 4024 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4025 { 4026 PetscErrorCode ierr; 4027 4028 PetscFunctionBegin; 4029 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4030 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4031 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4032 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4033 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4034 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4035 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4036 PetscFunctionReturn(0); 4037 } 4038 4039 #undef __FUNCT__ 4040 #define __FUNCT__ "MatCreateAIJ" 4041 /*@C 4042 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4043 (the default parallel PETSc format). For good matrix assembly performance 4044 the user should preallocate the matrix storage by setting the parameters 4045 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4046 performance can be increased by more than a factor of 50. 4047 4048 Collective on MPI_Comm 4049 4050 Input Parameters: 4051 + comm - MPI communicator 4052 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4053 This value should be the same as the local size used in creating the 4054 y vector for the matrix-vector product y = Ax. 4055 . n - This value should be the same as the local size used in creating the 4056 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4057 calculated if N is given) For square matrices n is almost always m. 4058 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4059 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4060 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4061 (same value is used for all local rows) 4062 . d_nnz - array containing the number of nonzeros in the various rows of the 4063 DIAGONAL portion of the local submatrix (possibly different for each row) 4064 or NULL, if d_nz is used to specify the nonzero structure. 4065 The size of this array is equal to the number of local rows, i.e 'm'. 4066 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4067 submatrix (same value is used for all local rows). 4068 - o_nnz - array containing the number of nonzeros in the various rows of the 4069 OFF-DIAGONAL portion of the local submatrix (possibly different for 4070 each row) or NULL, if o_nz is used to specify the nonzero 4071 structure. The size of this array is equal to the number 4072 of local rows, i.e 'm'. 4073 4074 Output Parameter: 4075 . A - the matrix 4076 4077 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4078 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4079 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4080 4081 Notes: 4082 If the *_nnz parameter is given then the *_nz parameter is ignored 4083 4084 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4085 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4086 storage requirements for this matrix. 4087 4088 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4089 processor than it must be used on all processors that share the object for 4090 that argument. 4091 4092 The user MUST specify either the local or global matrix dimensions 4093 (possibly both). 4094 4095 The parallel matrix is partitioned across processors such that the 4096 first m0 rows belong to process 0, the next m1 rows belong to 4097 process 1, the next m2 rows belong to process 2 etc.. where 4098 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4099 values corresponding to [m x N] submatrix. 4100 4101 The columns are logically partitioned with the n0 columns belonging 4102 to 0th partition, the next n1 columns belonging to the next 4103 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4104 4105 The DIAGONAL portion of the local submatrix on any given processor 4106 is the submatrix corresponding to the rows and columns m,n 4107 corresponding to the given processor. i.e diagonal matrix on 4108 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4109 etc. The remaining portion of the local submatrix [m x (N-n)] 4110 constitute the OFF-DIAGONAL portion. The example below better 4111 illustrates this concept. 4112 4113 For a square global matrix we define each processor's diagonal portion 4114 to be its local rows and the corresponding columns (a square submatrix); 4115 each processor's off-diagonal portion encompasses the remainder of the 4116 local matrix (a rectangular submatrix). 4117 4118 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4119 4120 When calling this routine with a single process communicator, a matrix of 4121 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4122 type of communicator, use the construction mechanism: 4123 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4124 4125 By default, this format uses inodes (identical nodes) when possible. 4126 We search for consecutive rows with the same nonzero structure, thereby 4127 reusing matrix information to achieve increased efficiency. 4128 4129 Options Database Keys: 4130 + -mat_no_inode - Do not use inodes 4131 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4132 - -mat_aij_oneindex - Internally use indexing starting at 1 4133 rather than 0. Note that when calling MatSetValues(), 4134 the user still MUST index entries starting at 0! 4135 4136 4137 Example usage: 4138 4139 Consider the following 8x8 matrix with 34 non-zero values, that is 4140 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4141 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4142 as follows: 4143 4144 .vb 4145 1 2 0 | 0 3 0 | 0 4 4146 Proc0 0 5 6 | 7 0 0 | 8 0 4147 9 0 10 | 11 0 0 | 12 0 4148 ------------------------------------- 4149 13 0 14 | 15 16 17 | 0 0 4150 Proc1 0 18 0 | 19 20 21 | 0 0 4151 0 0 0 | 22 23 0 | 24 0 4152 ------------------------------------- 4153 Proc2 25 26 27 | 0 0 28 | 29 0 4154 30 0 0 | 31 32 33 | 0 34 4155 .ve 4156 4157 This can be represented as a collection of submatrices as: 4158 4159 .vb 4160 A B C 4161 D E F 4162 G H I 4163 .ve 4164 4165 Where the submatrices A,B,C are owned by proc0, D,E,F are 4166 owned by proc1, G,H,I are owned by proc2. 4167 4168 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4169 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4170 The 'M','N' parameters are 8,8, and have the same values on all procs. 4171 4172 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4173 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4174 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4175 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4176 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4177 matrix, ans [DF] as another SeqAIJ matrix. 4178 4179 When d_nz, o_nz parameters are specified, d_nz storage elements are 4180 allocated for every row of the local diagonal submatrix, and o_nz 4181 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4182 One way to choose d_nz and o_nz is to use the max nonzerors per local 4183 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4184 In this case, the values of d_nz,o_nz are: 4185 .vb 4186 proc0 : dnz = 2, o_nz = 2 4187 proc1 : dnz = 3, o_nz = 2 4188 proc2 : dnz = 1, o_nz = 4 4189 .ve 4190 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4191 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4192 for proc3. i.e we are using 12+15+10=37 storage locations to store 4193 34 values. 4194 4195 When d_nnz, o_nnz parameters are specified, the storage is specified 4196 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4197 In the above case the values for d_nnz,o_nnz are: 4198 .vb 4199 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4200 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4201 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4202 .ve 4203 Here the space allocated is sum of all the above values i.e 34, and 4204 hence pre-allocation is perfect. 4205 4206 Level: intermediate 4207 4208 .keywords: matrix, aij, compressed row, sparse, parallel 4209 4210 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4211 MPIAIJ, MatCreateMPIAIJWithArrays() 4212 @*/ 4213 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4214 { 4215 PetscErrorCode ierr; 4216 PetscMPIInt size; 4217 4218 PetscFunctionBegin; 4219 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4220 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4221 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4222 if (size > 1) { 4223 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4224 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4225 } else { 4226 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4227 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4228 } 4229 PetscFunctionReturn(0); 4230 } 4231 4232 #undef __FUNCT__ 4233 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4234 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4235 { 4236 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4237 4238 PetscFunctionBegin; 4239 *Ad = a->A; 4240 *Ao = a->B; 4241 *colmap = a->garray; 4242 PetscFunctionReturn(0); 4243 } 4244 4245 #undef __FUNCT__ 4246 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4247 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4248 { 4249 PetscErrorCode ierr; 4250 PetscInt i; 4251 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4252 4253 PetscFunctionBegin; 4254 if (coloring->ctype == IS_COLORING_GLOBAL) { 4255 ISColoringValue *allcolors,*colors; 4256 ISColoring ocoloring; 4257 4258 /* set coloring for diagonal portion */ 4259 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4260 4261 /* set coloring for off-diagonal portion */ 4262 ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr); 4263 ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr); 4264 for (i=0; i<a->B->cmap->n; i++) { 4265 colors[i] = allcolors[a->garray[i]]; 4266 } 4267 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4268 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4269 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4270 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4271 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4272 ISColoringValue *colors; 4273 PetscInt *larray; 4274 ISColoring ocoloring; 4275 4276 /* set coloring for diagonal portion */ 4277 ierr = PetscMalloc1((a->A->cmap->n+1),&larray);CHKERRQ(ierr); 4278 for (i=0; i<a->A->cmap->n; i++) { 4279 larray[i] = i + A->cmap->rstart; 4280 } 4281 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr); 4282 ierr = PetscMalloc1((a->A->cmap->n+1),&colors);CHKERRQ(ierr); 4283 for (i=0; i<a->A->cmap->n; i++) { 4284 colors[i] = coloring->colors[larray[i]]; 4285 } 4286 ierr = PetscFree(larray);CHKERRQ(ierr); 4287 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4288 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4289 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4290 4291 /* set coloring for off-diagonal portion */ 4292 ierr = PetscMalloc1((a->B->cmap->n+1),&larray);CHKERRQ(ierr); 4293 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr); 4294 ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr); 4295 for (i=0; i<a->B->cmap->n; i++) { 4296 colors[i] = coloring->colors[larray[i]]; 4297 } 4298 ierr = PetscFree(larray);CHKERRQ(ierr); 4299 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4300 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4301 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4302 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4303 PetscFunctionReturn(0); 4304 } 4305 4306 #undef __FUNCT__ 4307 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4308 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4309 { 4310 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4311 PetscErrorCode ierr; 4312 4313 PetscFunctionBegin; 4314 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4315 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4316 PetscFunctionReturn(0); 4317 } 4318 4319 #undef __FUNCT__ 4320 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4321 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4322 { 4323 PetscErrorCode ierr; 4324 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4325 PetscInt *indx; 4326 4327 PetscFunctionBegin; 4328 /* This routine will ONLY return MPIAIJ type matrix */ 4329 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4330 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4331 if (n == PETSC_DECIDE) { 4332 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4333 } 4334 /* Check sum(n) = N */ 4335 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4336 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4337 4338 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4339 rstart -= m; 4340 4341 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4342 for (i=0; i<m; i++) { 4343 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4344 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4345 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4346 } 4347 4348 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4349 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4350 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4351 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4352 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4353 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4354 PetscFunctionReturn(0); 4355 } 4356 4357 #undef __FUNCT__ 4358 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4359 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4360 { 4361 PetscErrorCode ierr; 4362 PetscInt m,N,i,rstart,nnz,Ii; 4363 PetscInt *indx; 4364 PetscScalar *values; 4365 4366 PetscFunctionBegin; 4367 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4368 ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr); 4369 for (i=0; i<m; i++) { 4370 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4371 Ii = i + rstart; 4372 ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4373 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4374 } 4375 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4376 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4377 PetscFunctionReturn(0); 4378 } 4379 4380 #undef __FUNCT__ 4381 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4382 /*@ 4383 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4384 matrices from each processor 4385 4386 Collective on MPI_Comm 4387 4388 Input Parameters: 4389 + comm - the communicators the parallel matrix will live on 4390 . inmat - the input sequential matrices 4391 . n - number of local columns (or PETSC_DECIDE) 4392 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4393 4394 Output Parameter: 4395 . outmat - the parallel matrix generated 4396 4397 Level: advanced 4398 4399 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4400 4401 @*/ 4402 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4403 { 4404 PetscErrorCode ierr; 4405 PetscMPIInt size; 4406 4407 PetscFunctionBegin; 4408 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4409 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4410 if (size == 1) { 4411 if (scall == MAT_INITIAL_MATRIX) { 4412 ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr); 4413 } else { 4414 ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4415 } 4416 } else { 4417 if (scall == MAT_INITIAL_MATRIX) { 4418 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4419 } 4420 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4421 } 4422 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4423 PetscFunctionReturn(0); 4424 } 4425 4426 #undef __FUNCT__ 4427 #define __FUNCT__ "MatFileSplit" 4428 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4429 { 4430 PetscErrorCode ierr; 4431 PetscMPIInt rank; 4432 PetscInt m,N,i,rstart,nnz; 4433 size_t len; 4434 const PetscInt *indx; 4435 PetscViewer out; 4436 char *name; 4437 Mat B; 4438 const PetscScalar *values; 4439 4440 PetscFunctionBegin; 4441 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4442 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4443 /* Should this be the type of the diagonal block of A? */ 4444 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4445 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4446 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4447 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4448 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4449 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4450 for (i=0; i<m; i++) { 4451 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4452 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4453 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4454 } 4455 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4456 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4457 4458 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4459 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4460 ierr = PetscMalloc1((len+5),&name);CHKERRQ(ierr); 4461 sprintf(name,"%s.%d",outfile,rank); 4462 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4463 ierr = PetscFree(name);CHKERRQ(ierr); 4464 ierr = MatView(B,out);CHKERRQ(ierr); 4465 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4466 ierr = MatDestroy(&B);CHKERRQ(ierr); 4467 PetscFunctionReturn(0); 4468 } 4469 4470 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4471 #undef __FUNCT__ 4472 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4473 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4474 { 4475 PetscErrorCode ierr; 4476 Mat_Merge_SeqsToMPI *merge; 4477 PetscContainer container; 4478 4479 PetscFunctionBegin; 4480 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4481 if (container) { 4482 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4483 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4484 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4485 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4486 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4487 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4488 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4489 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4490 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4491 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4492 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4493 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4494 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4495 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4496 ierr = PetscFree(merge);CHKERRQ(ierr); 4497 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4498 } 4499 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4500 PetscFunctionReturn(0); 4501 } 4502 4503 #include <../src/mat/utils/freespace.h> 4504 #include <petscbt.h> 4505 4506 #undef __FUNCT__ 4507 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4508 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4509 { 4510 PetscErrorCode ierr; 4511 MPI_Comm comm; 4512 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4513 PetscMPIInt size,rank,taga,*len_s; 4514 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4515 PetscInt proc,m; 4516 PetscInt **buf_ri,**buf_rj; 4517 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4518 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4519 MPI_Request *s_waits,*r_waits; 4520 MPI_Status *status; 4521 MatScalar *aa=a->a; 4522 MatScalar **abuf_r,*ba_i; 4523 Mat_Merge_SeqsToMPI *merge; 4524 PetscContainer container; 4525 4526 PetscFunctionBegin; 4527 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4528 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4529 4530 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4531 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4532 4533 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4534 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4535 4536 bi = merge->bi; 4537 bj = merge->bj; 4538 buf_ri = merge->buf_ri; 4539 buf_rj = merge->buf_rj; 4540 4541 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4542 owners = merge->rowmap->range; 4543 len_s = merge->len_s; 4544 4545 /* send and recv matrix values */ 4546 /*-----------------------------*/ 4547 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4548 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4549 4550 ierr = PetscMalloc1((merge->nsend+1),&s_waits);CHKERRQ(ierr); 4551 for (proc=0,k=0; proc<size; proc++) { 4552 if (!len_s[proc]) continue; 4553 i = owners[proc]; 4554 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4555 k++; 4556 } 4557 4558 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4559 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4560 ierr = PetscFree(status);CHKERRQ(ierr); 4561 4562 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4563 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4564 4565 /* insert mat values of mpimat */ 4566 /*----------------------------*/ 4567 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4568 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4569 4570 for (k=0; k<merge->nrecv; k++) { 4571 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4572 nrows = *(buf_ri_k[k]); 4573 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4574 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4575 } 4576 4577 /* set values of ba */ 4578 m = merge->rowmap->n; 4579 for (i=0; i<m; i++) { 4580 arow = owners[rank] + i; 4581 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4582 bnzi = bi[i+1] - bi[i]; 4583 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4584 4585 /* add local non-zero vals of this proc's seqmat into ba */ 4586 anzi = ai[arow+1] - ai[arow]; 4587 aj = a->j + ai[arow]; 4588 aa = a->a + ai[arow]; 4589 nextaj = 0; 4590 for (j=0; nextaj<anzi; j++) { 4591 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4592 ba_i[j] += aa[nextaj++]; 4593 } 4594 } 4595 4596 /* add received vals into ba */ 4597 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4598 /* i-th row */ 4599 if (i == *nextrow[k]) { 4600 anzi = *(nextai[k]+1) - *nextai[k]; 4601 aj = buf_rj[k] + *(nextai[k]); 4602 aa = abuf_r[k] + *(nextai[k]); 4603 nextaj = 0; 4604 for (j=0; nextaj<anzi; j++) { 4605 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4606 ba_i[j] += aa[nextaj++]; 4607 } 4608 } 4609 nextrow[k]++; nextai[k]++; 4610 } 4611 } 4612 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4613 } 4614 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4615 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4616 4617 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4618 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4619 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4620 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4621 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4622 PetscFunctionReturn(0); 4623 } 4624 4625 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4626 4627 #undef __FUNCT__ 4628 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4629 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4630 { 4631 PetscErrorCode ierr; 4632 Mat B_mpi; 4633 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4634 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4635 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4636 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4637 PetscInt len,proc,*dnz,*onz,bs,cbs; 4638 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4639 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4640 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4641 MPI_Status *status; 4642 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4643 PetscBT lnkbt; 4644 Mat_Merge_SeqsToMPI *merge; 4645 PetscContainer container; 4646 4647 PetscFunctionBegin; 4648 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4649 4650 /* make sure it is a PETSc comm */ 4651 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4652 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4653 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4654 4655 ierr = PetscNew(&merge);CHKERRQ(ierr); 4656 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4657 4658 /* determine row ownership */ 4659 /*---------------------------------------------------------*/ 4660 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4661 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4662 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4663 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4664 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4665 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4666 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4667 4668 m = merge->rowmap->n; 4669 owners = merge->rowmap->range; 4670 4671 /* determine the number of messages to send, their lengths */ 4672 /*---------------------------------------------------------*/ 4673 len_s = merge->len_s; 4674 4675 len = 0; /* length of buf_si[] */ 4676 merge->nsend = 0; 4677 for (proc=0; proc<size; proc++) { 4678 len_si[proc] = 0; 4679 if (proc == rank) { 4680 len_s[proc] = 0; 4681 } else { 4682 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4683 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4684 } 4685 if (len_s[proc]) { 4686 merge->nsend++; 4687 nrows = 0; 4688 for (i=owners[proc]; i<owners[proc+1]; i++) { 4689 if (ai[i+1] > ai[i]) nrows++; 4690 } 4691 len_si[proc] = 2*(nrows+1); 4692 len += len_si[proc]; 4693 } 4694 } 4695 4696 /* determine the number and length of messages to receive for ij-structure */ 4697 /*-------------------------------------------------------------------------*/ 4698 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4699 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4700 4701 /* post the Irecv of j-structure */ 4702 /*-------------------------------*/ 4703 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4704 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4705 4706 /* post the Isend of j-structure */ 4707 /*--------------------------------*/ 4708 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4709 4710 for (proc=0, k=0; proc<size; proc++) { 4711 if (!len_s[proc]) continue; 4712 i = owners[proc]; 4713 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4714 k++; 4715 } 4716 4717 /* receives and sends of j-structure are complete */ 4718 /*------------------------------------------------*/ 4719 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4720 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4721 4722 /* send and recv i-structure */ 4723 /*---------------------------*/ 4724 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4725 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4726 4727 ierr = PetscMalloc1((len+1),&buf_s);CHKERRQ(ierr); 4728 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4729 for (proc=0,k=0; proc<size; proc++) { 4730 if (!len_s[proc]) continue; 4731 /* form outgoing message for i-structure: 4732 buf_si[0]: nrows to be sent 4733 [1:nrows]: row index (global) 4734 [nrows+1:2*nrows+1]: i-structure index 4735 */ 4736 /*-------------------------------------------*/ 4737 nrows = len_si[proc]/2 - 1; 4738 buf_si_i = buf_si + nrows+1; 4739 buf_si[0] = nrows; 4740 buf_si_i[0] = 0; 4741 nrows = 0; 4742 for (i=owners[proc]; i<owners[proc+1]; i++) { 4743 anzi = ai[i+1] - ai[i]; 4744 if (anzi) { 4745 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4746 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4747 nrows++; 4748 } 4749 } 4750 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4751 k++; 4752 buf_si += len_si[proc]; 4753 } 4754 4755 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4756 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4757 4758 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4759 for (i=0; i<merge->nrecv; i++) { 4760 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4761 } 4762 4763 ierr = PetscFree(len_si);CHKERRQ(ierr); 4764 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4765 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4766 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4767 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4768 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4769 ierr = PetscFree(status);CHKERRQ(ierr); 4770 4771 /* compute a local seq matrix in each processor */ 4772 /*----------------------------------------------*/ 4773 /* allocate bi array and free space for accumulating nonzero column info */ 4774 ierr = PetscMalloc1((m+1),&bi);CHKERRQ(ierr); 4775 bi[0] = 0; 4776 4777 /* create and initialize a linked list */ 4778 nlnk = N+1; 4779 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4780 4781 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4782 len = ai[owners[rank+1]] - ai[owners[rank]]; 4783 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4784 4785 current_space = free_space; 4786 4787 /* determine symbolic info for each local row */ 4788 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4789 4790 for (k=0; k<merge->nrecv; k++) { 4791 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4792 nrows = *buf_ri_k[k]; 4793 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4794 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4795 } 4796 4797 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4798 len = 0; 4799 for (i=0; i<m; i++) { 4800 bnzi = 0; 4801 /* add local non-zero cols of this proc's seqmat into lnk */ 4802 arow = owners[rank] + i; 4803 anzi = ai[arow+1] - ai[arow]; 4804 aj = a->j + ai[arow]; 4805 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4806 bnzi += nlnk; 4807 /* add received col data into lnk */ 4808 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4809 if (i == *nextrow[k]) { /* i-th row */ 4810 anzi = *(nextai[k]+1) - *nextai[k]; 4811 aj = buf_rj[k] + *nextai[k]; 4812 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4813 bnzi += nlnk; 4814 nextrow[k]++; nextai[k]++; 4815 } 4816 } 4817 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4818 4819 /* if free space is not available, make more free space */ 4820 if (current_space->local_remaining<bnzi) { 4821 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4822 nspacedouble++; 4823 } 4824 /* copy data into free space, then initialize lnk */ 4825 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4826 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4827 4828 current_space->array += bnzi; 4829 current_space->local_used += bnzi; 4830 current_space->local_remaining -= bnzi; 4831 4832 bi[i+1] = bi[i] + bnzi; 4833 } 4834 4835 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4836 4837 ierr = PetscMalloc1((bi[m]+1),&bj);CHKERRQ(ierr); 4838 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4839 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4840 4841 /* create symbolic parallel matrix B_mpi */ 4842 /*---------------------------------------*/ 4843 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4844 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4845 if (n==PETSC_DECIDE) { 4846 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4847 } else { 4848 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4849 } 4850 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4851 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4852 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4853 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4854 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4855 4856 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4857 B_mpi->assembled = PETSC_FALSE; 4858 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4859 merge->bi = bi; 4860 merge->bj = bj; 4861 merge->buf_ri = buf_ri; 4862 merge->buf_rj = buf_rj; 4863 merge->coi = NULL; 4864 merge->coj = NULL; 4865 merge->owners_co = NULL; 4866 4867 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4868 4869 /* attach the supporting struct to B_mpi for reuse */ 4870 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4871 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4872 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4873 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4874 *mpimat = B_mpi; 4875 4876 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4877 PetscFunctionReturn(0); 4878 } 4879 4880 #undef __FUNCT__ 4881 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4882 /*@C 4883 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4884 matrices from each processor 4885 4886 Collective on MPI_Comm 4887 4888 Input Parameters: 4889 + comm - the communicators the parallel matrix will live on 4890 . seqmat - the input sequential matrices 4891 . m - number of local rows (or PETSC_DECIDE) 4892 . n - number of local columns (or PETSC_DECIDE) 4893 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4894 4895 Output Parameter: 4896 . mpimat - the parallel matrix generated 4897 4898 Level: advanced 4899 4900 Notes: 4901 The dimensions of the sequential matrix in each processor MUST be the same. 4902 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4903 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4904 @*/ 4905 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4906 { 4907 PetscErrorCode ierr; 4908 PetscMPIInt size; 4909 4910 PetscFunctionBegin; 4911 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4912 if (size == 1) { 4913 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4914 if (scall == MAT_INITIAL_MATRIX) { 4915 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4916 } else { 4917 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4918 } 4919 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4920 PetscFunctionReturn(0); 4921 } 4922 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4923 if (scall == MAT_INITIAL_MATRIX) { 4924 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4925 } 4926 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4927 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4928 PetscFunctionReturn(0); 4929 } 4930 4931 #undef __FUNCT__ 4932 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4933 /*@ 4934 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4935 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4936 with MatGetSize() 4937 4938 Not Collective 4939 4940 Input Parameters: 4941 + A - the matrix 4942 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4943 4944 Output Parameter: 4945 . A_loc - the local sequential matrix generated 4946 4947 Level: developer 4948 4949 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4950 4951 @*/ 4952 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4953 { 4954 PetscErrorCode ierr; 4955 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4956 Mat_SeqAIJ *mat,*a,*b; 4957 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4958 MatScalar *aa,*ba,*cam; 4959 PetscScalar *ca; 4960 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4961 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4962 PetscBool match; 4963 4964 PetscFunctionBegin; 4965 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4966 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4967 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4968 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4969 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4970 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4971 aa = a->a; ba = b->a; 4972 if (scall == MAT_INITIAL_MATRIX) { 4973 ierr = PetscMalloc1((1+am),&ci);CHKERRQ(ierr); 4974 ci[0] = 0; 4975 for (i=0; i<am; i++) { 4976 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4977 } 4978 ierr = PetscMalloc1((1+ci[am]),&cj);CHKERRQ(ierr); 4979 ierr = PetscMalloc1((1+ci[am]),&ca);CHKERRQ(ierr); 4980 k = 0; 4981 for (i=0; i<am; i++) { 4982 ncols_o = bi[i+1] - bi[i]; 4983 ncols_d = ai[i+1] - ai[i]; 4984 /* off-diagonal portion of A */ 4985 for (jo=0; jo<ncols_o; jo++) { 4986 col = cmap[*bj]; 4987 if (col >= cstart) break; 4988 cj[k] = col; bj++; 4989 ca[k++] = *ba++; 4990 } 4991 /* diagonal portion of A */ 4992 for (j=0; j<ncols_d; j++) { 4993 cj[k] = cstart + *aj++; 4994 ca[k++] = *aa++; 4995 } 4996 /* off-diagonal portion of A */ 4997 for (j=jo; j<ncols_o; j++) { 4998 cj[k] = cmap[*bj++]; 4999 ca[k++] = *ba++; 5000 } 5001 } 5002 /* put together the new matrix */ 5003 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5004 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5005 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5006 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5007 mat->free_a = PETSC_TRUE; 5008 mat->free_ij = PETSC_TRUE; 5009 mat->nonew = 0; 5010 } else if (scall == MAT_REUSE_MATRIX) { 5011 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5012 ci = mat->i; cj = mat->j; cam = mat->a; 5013 for (i=0; i<am; i++) { 5014 /* off-diagonal portion of A */ 5015 ncols_o = bi[i+1] - bi[i]; 5016 for (jo=0; jo<ncols_o; jo++) { 5017 col = cmap[*bj]; 5018 if (col >= cstart) break; 5019 *cam++ = *ba++; bj++; 5020 } 5021 /* diagonal portion of A */ 5022 ncols_d = ai[i+1] - ai[i]; 5023 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5024 /* off-diagonal portion of A */ 5025 for (j=jo; j<ncols_o; j++) { 5026 *cam++ = *ba++; bj++; 5027 } 5028 } 5029 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5030 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5031 PetscFunctionReturn(0); 5032 } 5033 5034 #undef __FUNCT__ 5035 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5036 /*@C 5037 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5038 5039 Not Collective 5040 5041 Input Parameters: 5042 + A - the matrix 5043 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5044 - row, col - index sets of rows and columns to extract (or NULL) 5045 5046 Output Parameter: 5047 . A_loc - the local sequential matrix generated 5048 5049 Level: developer 5050 5051 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5052 5053 @*/ 5054 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5055 { 5056 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5057 PetscErrorCode ierr; 5058 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5059 IS isrowa,iscola; 5060 Mat *aloc; 5061 PetscBool match; 5062 5063 PetscFunctionBegin; 5064 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5065 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5066 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5067 if (!row) { 5068 start = A->rmap->rstart; end = A->rmap->rend; 5069 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5070 } else { 5071 isrowa = *row; 5072 } 5073 if (!col) { 5074 start = A->cmap->rstart; 5075 cmap = a->garray; 5076 nzA = a->A->cmap->n; 5077 nzB = a->B->cmap->n; 5078 ierr = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr); 5079 ncols = 0; 5080 for (i=0; i<nzB; i++) { 5081 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5082 else break; 5083 } 5084 imark = i; 5085 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5086 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5087 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5088 } else { 5089 iscola = *col; 5090 } 5091 if (scall != MAT_INITIAL_MATRIX) { 5092 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5093 aloc[0] = *A_loc; 5094 } 5095 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5096 *A_loc = aloc[0]; 5097 ierr = PetscFree(aloc);CHKERRQ(ierr); 5098 if (!row) { 5099 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5100 } 5101 if (!col) { 5102 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5103 } 5104 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5105 PetscFunctionReturn(0); 5106 } 5107 5108 #undef __FUNCT__ 5109 #define __FUNCT__ "MatGetBrowsOfAcols" 5110 /*@C 5111 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5112 5113 Collective on Mat 5114 5115 Input Parameters: 5116 + A,B - the matrices in mpiaij format 5117 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5118 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5119 5120 Output Parameter: 5121 + rowb, colb - index sets of rows and columns of B to extract 5122 - B_seq - the sequential matrix generated 5123 5124 Level: developer 5125 5126 @*/ 5127 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5128 { 5129 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5130 PetscErrorCode ierr; 5131 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5132 IS isrowb,iscolb; 5133 Mat *bseq=NULL; 5134 5135 PetscFunctionBegin; 5136 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5137 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5138 } 5139 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5140 5141 if (scall == MAT_INITIAL_MATRIX) { 5142 start = A->cmap->rstart; 5143 cmap = a->garray; 5144 nzA = a->A->cmap->n; 5145 nzB = a->B->cmap->n; 5146 ierr = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr); 5147 ncols = 0; 5148 for (i=0; i<nzB; i++) { /* row < local row index */ 5149 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5150 else break; 5151 } 5152 imark = i; 5153 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5154 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5155 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5156 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5157 } else { 5158 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5159 isrowb = *rowb; iscolb = *colb; 5160 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5161 bseq[0] = *B_seq; 5162 } 5163 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5164 *B_seq = bseq[0]; 5165 ierr = PetscFree(bseq);CHKERRQ(ierr); 5166 if (!rowb) { 5167 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5168 } else { 5169 *rowb = isrowb; 5170 } 5171 if (!colb) { 5172 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5173 } else { 5174 *colb = iscolb; 5175 } 5176 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5177 PetscFunctionReturn(0); 5178 } 5179 5180 #undef __FUNCT__ 5181 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5182 /* 5183 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5184 of the OFF-DIAGONAL portion of local A 5185 5186 Collective on Mat 5187 5188 Input Parameters: 5189 + A,B - the matrices in mpiaij format 5190 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5191 5192 Output Parameter: 5193 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5194 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5195 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5196 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5197 5198 Level: developer 5199 5200 */ 5201 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5202 { 5203 VecScatter_MPI_General *gen_to,*gen_from; 5204 PetscErrorCode ierr; 5205 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5206 Mat_SeqAIJ *b_oth; 5207 VecScatter ctx =a->Mvctx; 5208 MPI_Comm comm; 5209 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5210 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5211 PetscScalar *rvalues,*svalues; 5212 MatScalar *b_otha,*bufa,*bufA; 5213 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5214 MPI_Request *rwaits = NULL,*swaits = NULL; 5215 MPI_Status *sstatus,rstatus; 5216 PetscMPIInt jj; 5217 PetscInt *cols,sbs,rbs; 5218 PetscScalar *vals; 5219 5220 PetscFunctionBegin; 5221 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5222 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5223 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5224 } 5225 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5226 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5227 5228 gen_to = (VecScatter_MPI_General*)ctx->todata; 5229 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5230 rvalues = gen_from->values; /* holds the length of receiving row */ 5231 svalues = gen_to->values; /* holds the length of sending row */ 5232 nrecvs = gen_from->n; 5233 nsends = gen_to->n; 5234 5235 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5236 srow = gen_to->indices; /* local row index to be sent */ 5237 sstarts = gen_to->starts; 5238 sprocs = gen_to->procs; 5239 sstatus = gen_to->sstatus; 5240 sbs = gen_to->bs; 5241 rstarts = gen_from->starts; 5242 rprocs = gen_from->procs; 5243 rbs = gen_from->bs; 5244 5245 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5246 if (scall == MAT_INITIAL_MATRIX) { 5247 /* i-array */ 5248 /*---------*/ 5249 /* post receives */ 5250 for (i=0; i<nrecvs; i++) { 5251 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5252 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5253 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5254 } 5255 5256 /* pack the outgoing message */ 5257 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5258 5259 sstartsj[0] = 0; 5260 rstartsj[0] = 0; 5261 len = 0; /* total length of j or a array to be sent */ 5262 k = 0; 5263 for (i=0; i<nsends; i++) { 5264 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5265 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5266 for (j=0; j<nrows; j++) { 5267 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5268 for (l=0; l<sbs; l++) { 5269 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5270 5271 rowlen[j*sbs+l] = ncols; 5272 5273 len += ncols; 5274 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5275 } 5276 k++; 5277 } 5278 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5279 5280 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5281 } 5282 /* recvs and sends of i-array are completed */ 5283 i = nrecvs; 5284 while (i--) { 5285 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5286 } 5287 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5288 5289 /* allocate buffers for sending j and a arrays */ 5290 ierr = PetscMalloc1((len+1),&bufj);CHKERRQ(ierr); 5291 ierr = PetscMalloc1((len+1),&bufa);CHKERRQ(ierr); 5292 5293 /* create i-array of B_oth */ 5294 ierr = PetscMalloc1((aBn+2),&b_othi);CHKERRQ(ierr); 5295 5296 b_othi[0] = 0; 5297 len = 0; /* total length of j or a array to be received */ 5298 k = 0; 5299 for (i=0; i<nrecvs; i++) { 5300 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5301 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5302 for (j=0; j<nrows; j++) { 5303 b_othi[k+1] = b_othi[k] + rowlen[j]; 5304 len += rowlen[j]; k++; 5305 } 5306 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5307 } 5308 5309 /* allocate space for j and a arrrays of B_oth */ 5310 ierr = PetscMalloc1((b_othi[aBn]+1),&b_othj);CHKERRQ(ierr); 5311 ierr = PetscMalloc1((b_othi[aBn]+1),&b_otha);CHKERRQ(ierr); 5312 5313 /* j-array */ 5314 /*---------*/ 5315 /* post receives of j-array */ 5316 for (i=0; i<nrecvs; i++) { 5317 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5318 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5319 } 5320 5321 /* pack the outgoing message j-array */ 5322 k = 0; 5323 for (i=0; i<nsends; i++) { 5324 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5325 bufJ = bufj+sstartsj[i]; 5326 for (j=0; j<nrows; j++) { 5327 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5328 for (ll=0; ll<sbs; ll++) { 5329 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5330 for (l=0; l<ncols; l++) { 5331 *bufJ++ = cols[l]; 5332 } 5333 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5334 } 5335 } 5336 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5337 } 5338 5339 /* recvs and sends of j-array are completed */ 5340 i = nrecvs; 5341 while (i--) { 5342 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5343 } 5344 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5345 } else if (scall == MAT_REUSE_MATRIX) { 5346 sstartsj = *startsj_s; 5347 rstartsj = *startsj_r; 5348 bufa = *bufa_ptr; 5349 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5350 b_otha = b_oth->a; 5351 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5352 5353 /* a-array */ 5354 /*---------*/ 5355 /* post receives of a-array */ 5356 for (i=0; i<nrecvs; i++) { 5357 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5358 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5359 } 5360 5361 /* pack the outgoing message a-array */ 5362 k = 0; 5363 for (i=0; i<nsends; i++) { 5364 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5365 bufA = bufa+sstartsj[i]; 5366 for (j=0; j<nrows; j++) { 5367 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5368 for (ll=0; ll<sbs; ll++) { 5369 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5370 for (l=0; l<ncols; l++) { 5371 *bufA++ = vals[l]; 5372 } 5373 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5374 } 5375 } 5376 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5377 } 5378 /* recvs and sends of a-array are completed */ 5379 i = nrecvs; 5380 while (i--) { 5381 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5382 } 5383 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5384 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5385 5386 if (scall == MAT_INITIAL_MATRIX) { 5387 /* put together the new matrix */ 5388 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5389 5390 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5391 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5392 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5393 b_oth->free_a = PETSC_TRUE; 5394 b_oth->free_ij = PETSC_TRUE; 5395 b_oth->nonew = 0; 5396 5397 ierr = PetscFree(bufj);CHKERRQ(ierr); 5398 if (!startsj_s || !bufa_ptr) { 5399 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5400 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5401 } else { 5402 *startsj_s = sstartsj; 5403 *startsj_r = rstartsj; 5404 *bufa_ptr = bufa; 5405 } 5406 } 5407 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5408 PetscFunctionReturn(0); 5409 } 5410 5411 #undef __FUNCT__ 5412 #define __FUNCT__ "MatGetCommunicationStructs" 5413 /*@C 5414 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5415 5416 Not Collective 5417 5418 Input Parameters: 5419 . A - The matrix in mpiaij format 5420 5421 Output Parameter: 5422 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5423 . colmap - A map from global column index to local index into lvec 5424 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5425 5426 Level: developer 5427 5428 @*/ 5429 #if defined(PETSC_USE_CTABLE) 5430 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5431 #else 5432 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5433 #endif 5434 { 5435 Mat_MPIAIJ *a; 5436 5437 PetscFunctionBegin; 5438 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5439 PetscValidPointer(lvec, 2); 5440 PetscValidPointer(colmap, 3); 5441 PetscValidPointer(multScatter, 4); 5442 a = (Mat_MPIAIJ*) A->data; 5443 if (lvec) *lvec = a->lvec; 5444 if (colmap) *colmap = a->colmap; 5445 if (multScatter) *multScatter = a->Mvctx; 5446 PetscFunctionReturn(0); 5447 } 5448 5449 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5450 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5451 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5452 5453 #undef __FUNCT__ 5454 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5455 /* 5456 Computes (B'*A')' since computing B*A directly is untenable 5457 5458 n p p 5459 ( ) ( ) ( ) 5460 m ( A ) * n ( B ) = m ( C ) 5461 ( ) ( ) ( ) 5462 5463 */ 5464 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5465 { 5466 PetscErrorCode ierr; 5467 Mat At,Bt,Ct; 5468 5469 PetscFunctionBegin; 5470 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5471 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5472 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5473 ierr = MatDestroy(&At);CHKERRQ(ierr); 5474 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5475 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5476 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5477 PetscFunctionReturn(0); 5478 } 5479 5480 #undef __FUNCT__ 5481 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5482 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5483 { 5484 PetscErrorCode ierr; 5485 PetscInt m=A->rmap->n,n=B->cmap->n; 5486 Mat Cmat; 5487 5488 PetscFunctionBegin; 5489 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5490 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5491 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5492 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5493 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5494 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5495 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5496 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5497 5498 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5499 5500 *C = Cmat; 5501 PetscFunctionReturn(0); 5502 } 5503 5504 /* ----------------------------------------------------------------*/ 5505 #undef __FUNCT__ 5506 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5507 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5508 { 5509 PetscErrorCode ierr; 5510 5511 PetscFunctionBegin; 5512 if (scall == MAT_INITIAL_MATRIX) { 5513 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5514 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5515 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5516 } 5517 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5518 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5519 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5520 PetscFunctionReturn(0); 5521 } 5522 5523 #if defined(PETSC_HAVE_MUMPS) 5524 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5525 #endif 5526 #if defined(PETSC_HAVE_PASTIX) 5527 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5528 #endif 5529 #if defined(PETSC_HAVE_SUPERLU_DIST) 5530 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5531 #endif 5532 #if defined(PETSC_HAVE_CLIQUE) 5533 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*); 5534 #endif 5535 5536 /*MC 5537 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5538 5539 Options Database Keys: 5540 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5541 5542 Level: beginner 5543 5544 .seealso: MatCreateAIJ() 5545 M*/ 5546 5547 #undef __FUNCT__ 5548 #define __FUNCT__ "MatCreate_MPIAIJ" 5549 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5550 { 5551 Mat_MPIAIJ *b; 5552 PetscErrorCode ierr; 5553 PetscMPIInt size; 5554 5555 PetscFunctionBegin; 5556 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5557 5558 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5559 B->data = (void*)b; 5560 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5561 B->assembled = PETSC_FALSE; 5562 B->insertmode = NOT_SET_VALUES; 5563 b->size = size; 5564 5565 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5566 5567 /* build cache for off array entries formed */ 5568 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5569 5570 b->donotstash = PETSC_FALSE; 5571 b->colmap = 0; 5572 b->garray = 0; 5573 b->roworiented = PETSC_TRUE; 5574 5575 /* stuff used for matrix vector multiply */ 5576 b->lvec = NULL; 5577 b->Mvctx = NULL; 5578 5579 /* stuff for MatGetRow() */ 5580 b->rowindices = 0; 5581 b->rowvalues = 0; 5582 b->getrowactive = PETSC_FALSE; 5583 5584 /* flexible pointer used in CUSP/CUSPARSE classes */ 5585 b->spptr = NULL; 5586 5587 #if defined(PETSC_HAVE_MUMPS) 5588 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr); 5589 #endif 5590 #if defined(PETSC_HAVE_PASTIX) 5591 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5592 #endif 5593 #if defined(PETSC_HAVE_SUPERLU_DIST) 5594 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5595 #endif 5596 #if defined(PETSC_HAVE_CLIQUE) 5597 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr); 5598 #endif 5599 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5600 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5601 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5602 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5603 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5604 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5605 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5606 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5607 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5608 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5609 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5610 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5611 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5612 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5613 PetscFunctionReturn(0); 5614 } 5615 5616 #undef __FUNCT__ 5617 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5618 /*@ 5619 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5620 and "off-diagonal" part of the matrix in CSR format. 5621 5622 Collective on MPI_Comm 5623 5624 Input Parameters: 5625 + comm - MPI communicator 5626 . m - number of local rows (Cannot be PETSC_DECIDE) 5627 . n - This value should be the same as the local size used in creating the 5628 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5629 calculated if N is given) For square matrices n is almost always m. 5630 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5631 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5632 . i - row indices for "diagonal" portion of matrix 5633 . j - column indices 5634 . a - matrix values 5635 . oi - row indices for "off-diagonal" portion of matrix 5636 . oj - column indices 5637 - oa - matrix values 5638 5639 Output Parameter: 5640 . mat - the matrix 5641 5642 Level: advanced 5643 5644 Notes: 5645 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5646 must free the arrays once the matrix has been destroyed and not before. 5647 5648 The i and j indices are 0 based 5649 5650 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5651 5652 This sets local rows and cannot be used to set off-processor values. 5653 5654 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5655 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5656 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5657 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5658 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5659 communication if it is known that only local entries will be set. 5660 5661 .keywords: matrix, aij, compressed row, sparse, parallel 5662 5663 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5664 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5665 @*/ 5666 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5667 { 5668 PetscErrorCode ierr; 5669 Mat_MPIAIJ *maij; 5670 5671 PetscFunctionBegin; 5672 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5673 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5674 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5675 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5676 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5677 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5678 maij = (Mat_MPIAIJ*) (*mat)->data; 5679 5680 (*mat)->preallocated = PETSC_TRUE; 5681 5682 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5683 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5684 5685 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5686 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5687 5688 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5689 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5690 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5691 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5692 5693 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5694 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5695 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5696 PetscFunctionReturn(0); 5697 } 5698 5699 /* 5700 Special version for direct calls from Fortran 5701 */ 5702 #include <petsc-private/fortranimpl.h> 5703 5704 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5705 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5706 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5707 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5708 #endif 5709 5710 /* Change these macros so can be used in void function */ 5711 #undef CHKERRQ 5712 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5713 #undef SETERRQ2 5714 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5715 #undef SETERRQ3 5716 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5717 #undef SETERRQ 5718 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5719 5720 #undef __FUNCT__ 5721 #define __FUNCT__ "matsetvaluesmpiaij_" 5722 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5723 { 5724 Mat mat = *mmat; 5725 PetscInt m = *mm, n = *mn; 5726 InsertMode addv = *maddv; 5727 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5728 PetscScalar value; 5729 PetscErrorCode ierr; 5730 5731 MatCheckPreallocated(mat,1); 5732 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5733 5734 #if defined(PETSC_USE_DEBUG) 5735 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5736 #endif 5737 { 5738 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5739 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5740 PetscBool roworiented = aij->roworiented; 5741 5742 /* Some Variables required in the macro */ 5743 Mat A = aij->A; 5744 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5745 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5746 MatScalar *aa = a->a; 5747 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5748 Mat B = aij->B; 5749 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5750 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5751 MatScalar *ba = b->a; 5752 5753 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5754 PetscInt nonew = a->nonew; 5755 MatScalar *ap1,*ap2; 5756 5757 PetscFunctionBegin; 5758 for (i=0; i<m; i++) { 5759 if (im[i] < 0) continue; 5760 #if defined(PETSC_USE_DEBUG) 5761 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5762 #endif 5763 if (im[i] >= rstart && im[i] < rend) { 5764 row = im[i] - rstart; 5765 lastcol1 = -1; 5766 rp1 = aj + ai[row]; 5767 ap1 = aa + ai[row]; 5768 rmax1 = aimax[row]; 5769 nrow1 = ailen[row]; 5770 low1 = 0; 5771 high1 = nrow1; 5772 lastcol2 = -1; 5773 rp2 = bj + bi[row]; 5774 ap2 = ba + bi[row]; 5775 rmax2 = bimax[row]; 5776 nrow2 = bilen[row]; 5777 low2 = 0; 5778 high2 = nrow2; 5779 5780 for (j=0; j<n; j++) { 5781 if (roworiented) value = v[i*n+j]; 5782 else value = v[i+j*m]; 5783 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5784 if (in[j] >= cstart && in[j] < cend) { 5785 col = in[j] - cstart; 5786 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5787 } else if (in[j] < 0) continue; 5788 #if defined(PETSC_USE_DEBUG) 5789 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5790 #endif 5791 else { 5792 if (mat->was_assembled) { 5793 if (!aij->colmap) { 5794 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5795 } 5796 #if defined(PETSC_USE_CTABLE) 5797 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5798 col--; 5799 #else 5800 col = aij->colmap[in[j]] - 1; 5801 #endif 5802 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5803 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5804 col = in[j]; 5805 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5806 B = aij->B; 5807 b = (Mat_SeqAIJ*)B->data; 5808 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5809 rp2 = bj + bi[row]; 5810 ap2 = ba + bi[row]; 5811 rmax2 = bimax[row]; 5812 nrow2 = bilen[row]; 5813 low2 = 0; 5814 high2 = nrow2; 5815 bm = aij->B->rmap->n; 5816 ba = b->a; 5817 } 5818 } else col = in[j]; 5819 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5820 } 5821 } 5822 } else if (!aij->donotstash) { 5823 if (roworiented) { 5824 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5825 } else { 5826 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5827 } 5828 } 5829 } 5830 } 5831 PetscFunctionReturnVoid(); 5832 } 5833 5834