1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc-private/vecimpl.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 /*MC 8 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 9 10 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 11 and MATMPIAIJ otherwise. As a result, for single process communicators, 12 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 13 for communicators controlling multiple processes. It is recommended that you call both of 14 the above preallocation routines for simplicity. 15 16 Options Database Keys: 17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 18 19 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 20 enough exist. 21 22 Level: beginner 23 24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 25 M*/ 26 27 /*MC 28 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 29 30 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 31 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 32 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 33 for communicators controlling multiple processes. It is recommended that you call both of 34 the above preallocation routines for simplicity. 35 36 Options Database Keys: 37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 38 39 Level: beginner 40 41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 42 M*/ 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 47 { 48 PetscErrorCode ierr; 49 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 50 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 51 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 52 const PetscInt *ia,*ib; 53 const MatScalar *aa,*bb; 54 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 55 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 56 57 PetscFunctionBegin; 58 *keptrows = 0; 59 ia = a->i; 60 ib = b->i; 61 for (i=0; i<m; i++) { 62 na = ia[i+1] - ia[i]; 63 nb = ib[i+1] - ib[i]; 64 if (!na && !nb) { 65 cnt++; 66 goto ok1; 67 } 68 aa = a->a + ia[i]; 69 for (j=0; j<na; j++) { 70 if (aa[j] != 0.0) goto ok1; 71 } 72 bb = b->a + ib[i]; 73 for (j=0; j <nb; j++) { 74 if (bb[j] != 0.0) goto ok1; 75 } 76 cnt++; 77 ok1:; 78 } 79 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPIU_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 80 if (!n0rows) PetscFunctionReturn(0); 81 ierr = PetscMalloc1((M->rmap->n-cnt),&rows);CHKERRQ(ierr); 82 cnt = 0; 83 for (i=0; i<m; i++) { 84 na = ia[i+1] - ia[i]; 85 nb = ib[i+1] - ib[i]; 86 if (!na && !nb) continue; 87 aa = a->a + ia[i]; 88 for (j=0; j<na;j++) { 89 if (aa[j] != 0.0) { 90 rows[cnt++] = rstart + i; 91 goto ok2; 92 } 93 } 94 bb = b->a + ib[i]; 95 for (j=0; j<nb; j++) { 96 if (bb[j] != 0.0) { 97 rows[cnt++] = rstart + i; 98 goto ok2; 99 } 100 } 101 ok2:; 102 } 103 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 104 PetscFunctionReturn(0); 105 } 106 107 #undef __FUNCT__ 108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 110 { 111 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 112 PetscErrorCode ierr; 113 PetscInt i,rstart,nrows,*rows; 114 115 PetscFunctionBegin; 116 *zrows = NULL; 117 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 118 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 119 for (i=0; i<nrows; i++) rows[i] += rstart; 120 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 121 PetscFunctionReturn(0); 122 } 123 124 #undef __FUNCT__ 125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 127 { 128 PetscErrorCode ierr; 129 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 130 PetscInt i,n,*garray = aij->garray; 131 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 132 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 133 PetscReal *work; 134 135 PetscFunctionBegin; 136 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 137 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 138 if (type == NORM_2) { 139 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 140 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 141 } 142 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 143 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 144 } 145 } else if (type == NORM_1) { 146 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 147 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 148 } 149 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 150 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 151 } 152 } else if (type == NORM_INFINITY) { 153 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 154 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 155 } 156 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 157 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 158 } 159 160 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 161 if (type == NORM_INFINITY) { 162 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 163 } else { 164 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 165 } 166 ierr = PetscFree(work);CHKERRQ(ierr); 167 if (type == NORM_2) { 168 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 169 } 170 PetscFunctionReturn(0); 171 } 172 173 #undef __FUNCT__ 174 #define __FUNCT__ "MatDistribute_MPIAIJ" 175 /* 176 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 177 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 178 179 Only for square matrices 180 181 Used by a preconditioner, hence PETSC_EXTERN 182 */ 183 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 184 { 185 PetscMPIInt rank,size; 186 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 187 PetscErrorCode ierr; 188 Mat mat; 189 Mat_SeqAIJ *gmata; 190 PetscMPIInt tag; 191 MPI_Status status; 192 PetscBool aij; 193 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 194 195 PetscFunctionBegin; 196 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 197 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 198 if (!rank) { 199 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 200 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 201 } 202 if (reuse == MAT_INITIAL_MATRIX) { 203 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 204 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 205 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 206 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 207 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 208 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 209 ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr); 210 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 211 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 212 213 rowners[0] = 0; 214 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 215 rstart = rowners[rank]; 216 rend = rowners[rank+1]; 217 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 218 if (!rank) { 219 gmata = (Mat_SeqAIJ*) gmat->data; 220 /* send row lengths to all processors */ 221 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 222 for (i=1; i<size; i++) { 223 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 224 } 225 /* determine number diagonal and off-diagonal counts */ 226 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 227 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 228 jj = 0; 229 for (i=0; i<m; i++) { 230 for (j=0; j<dlens[i]; j++) { 231 if (gmata->j[jj] < rstart) ld[i]++; 232 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 233 jj++; 234 } 235 } 236 /* send column indices to other processes */ 237 for (i=1; i<size; i++) { 238 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 239 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 240 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 241 } 242 243 /* send numerical values to other processes */ 244 for (i=1; i<size; i++) { 245 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 246 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 247 } 248 gmataa = gmata->a; 249 gmataj = gmata->j; 250 251 } else { 252 /* receive row lengths */ 253 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 254 /* receive column indices */ 255 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 256 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 257 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 258 /* determine number diagonal and off-diagonal counts */ 259 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 260 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 261 jj = 0; 262 for (i=0; i<m; i++) { 263 for (j=0; j<dlens[i]; j++) { 264 if (gmataj[jj] < rstart) ld[i]++; 265 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 266 jj++; 267 } 268 } 269 /* receive numerical values */ 270 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 271 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 272 } 273 /* set preallocation */ 274 for (i=0; i<m; i++) { 275 dlens[i] -= olens[i]; 276 } 277 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 278 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 279 280 for (i=0; i<m; i++) { 281 dlens[i] += olens[i]; 282 } 283 cnt = 0; 284 for (i=0; i<m; i++) { 285 row = rstart + i; 286 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 287 cnt += dlens[i]; 288 } 289 if (rank) { 290 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 291 } 292 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 293 ierr = PetscFree(rowners);CHKERRQ(ierr); 294 295 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 296 297 *inmat = mat; 298 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 299 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 300 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 301 mat = *inmat; 302 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 303 if (!rank) { 304 /* send numerical values to other processes */ 305 gmata = (Mat_SeqAIJ*) gmat->data; 306 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 307 gmataa = gmata->a; 308 for (i=1; i<size; i++) { 309 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 310 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 311 } 312 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 313 } else { 314 /* receive numerical values from process 0*/ 315 nz = Ad->nz + Ao->nz; 316 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 317 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 318 } 319 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 320 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 321 ad = Ad->a; 322 ao = Ao->a; 323 if (mat->rmap->n) { 324 i = 0; 325 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 326 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 327 } 328 for (i=1; i<mat->rmap->n; i++) { 329 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 330 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 331 } 332 i--; 333 if (mat->rmap->n) { 334 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 335 } 336 if (rank) { 337 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 338 } 339 } 340 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 341 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 342 PetscFunctionReturn(0); 343 } 344 345 /* 346 Local utility routine that creates a mapping from the global column 347 number to the local number in the off-diagonal part of the local 348 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 349 a slightly higher hash table cost; without it it is not scalable (each processor 350 has an order N integer array but is fast to acess. 351 */ 352 #undef __FUNCT__ 353 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 354 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 355 { 356 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 357 PetscErrorCode ierr; 358 PetscInt n = aij->B->cmap->n,i; 359 360 PetscFunctionBegin; 361 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 362 #if defined(PETSC_USE_CTABLE) 363 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 364 for (i=0; i<n; i++) { 365 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 366 } 367 #else 368 ierr = PetscCalloc1((mat->cmap->N+1),&aij->colmap);CHKERRQ(ierr); 369 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 370 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 371 #endif 372 PetscFunctionReturn(0); 373 } 374 375 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 376 { \ 377 if (col <= lastcol1) low1 = 0; \ 378 else high1 = nrow1; \ 379 lastcol1 = col;\ 380 while (high1-low1 > 5) { \ 381 t = (low1+high1)/2; \ 382 if (rp1[t] > col) high1 = t; \ 383 else low1 = t; \ 384 } \ 385 for (_i=low1; _i<high1; _i++) { \ 386 if (rp1[_i] > col) break; \ 387 if (rp1[_i] == col) { \ 388 if (addv == ADD_VALUES) ap1[_i] += value; \ 389 else ap1[_i] = value; \ 390 goto a_noinsert; \ 391 } \ 392 } \ 393 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 394 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 395 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 396 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 397 N = nrow1++ - 1; a->nz++; high1++; \ 398 /* shift up all the later entries in this row */ \ 399 for (ii=N; ii>=_i; ii--) { \ 400 rp1[ii+1] = rp1[ii]; \ 401 ap1[ii+1] = ap1[ii]; \ 402 } \ 403 rp1[_i] = col; \ 404 ap1[_i] = value; \ 405 A->nonzerostate++;\ 406 a_noinsert: ; \ 407 ailen[row] = nrow1; \ 408 } 409 410 411 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 412 { \ 413 if (col <= lastcol2) low2 = 0; \ 414 else high2 = nrow2; \ 415 lastcol2 = col; \ 416 while (high2-low2 > 5) { \ 417 t = (low2+high2)/2; \ 418 if (rp2[t] > col) high2 = t; \ 419 else low2 = t; \ 420 } \ 421 for (_i=low2; _i<high2; _i++) { \ 422 if (rp2[_i] > col) break; \ 423 if (rp2[_i] == col) { \ 424 if (addv == ADD_VALUES) ap2[_i] += value; \ 425 else ap2[_i] = value; \ 426 goto b_noinsert; \ 427 } \ 428 } \ 429 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 430 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 431 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 432 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 433 N = nrow2++ - 1; b->nz++; high2++; \ 434 /* shift up all the later entries in this row */ \ 435 for (ii=N; ii>=_i; ii--) { \ 436 rp2[ii+1] = rp2[ii]; \ 437 ap2[ii+1] = ap2[ii]; \ 438 } \ 439 rp2[_i] = col; \ 440 ap2[_i] = value; \ 441 B->nonzerostate++; \ 442 b_noinsert: ; \ 443 bilen[row] = nrow2; \ 444 } 445 446 #undef __FUNCT__ 447 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 448 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 449 { 450 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 451 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 452 PetscErrorCode ierr; 453 PetscInt l,*garray = mat->garray,diag; 454 455 PetscFunctionBegin; 456 /* code only works for square matrices A */ 457 458 /* find size of row to the left of the diagonal part */ 459 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 460 row = row - diag; 461 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 462 if (garray[b->j[b->i[row]+l]] > diag) break; 463 } 464 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 465 466 /* diagonal part */ 467 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 468 469 /* right of diagonal part */ 470 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 471 PetscFunctionReturn(0); 472 } 473 474 #undef __FUNCT__ 475 #define __FUNCT__ "MatSetValues_MPIAIJ" 476 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 477 { 478 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 479 PetscScalar value; 480 PetscErrorCode ierr; 481 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 482 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 483 PetscBool roworiented = aij->roworiented; 484 485 /* Some Variables required in the macro */ 486 Mat A = aij->A; 487 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 488 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 489 MatScalar *aa = a->a; 490 PetscBool ignorezeroentries = a->ignorezeroentries; 491 Mat B = aij->B; 492 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 493 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 494 MatScalar *ba = b->a; 495 496 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 497 PetscInt nonew; 498 MatScalar *ap1,*ap2; 499 500 PetscFunctionBegin; 501 for (i=0; i<m; i++) { 502 if (im[i] < 0) continue; 503 #if defined(PETSC_USE_DEBUG) 504 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 505 #endif 506 if (im[i] >= rstart && im[i] < rend) { 507 row = im[i] - rstart; 508 lastcol1 = -1; 509 rp1 = aj + ai[row]; 510 ap1 = aa + ai[row]; 511 rmax1 = aimax[row]; 512 nrow1 = ailen[row]; 513 low1 = 0; 514 high1 = nrow1; 515 lastcol2 = -1; 516 rp2 = bj + bi[row]; 517 ap2 = ba + bi[row]; 518 rmax2 = bimax[row]; 519 nrow2 = bilen[row]; 520 low2 = 0; 521 high2 = nrow2; 522 523 for (j=0; j<n; j++) { 524 if (roworiented) value = v[i*n+j]; 525 else value = v[i+j*m]; 526 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 527 if (in[j] >= cstart && in[j] < cend) { 528 col = in[j] - cstart; 529 nonew = a->nonew; 530 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 531 } else if (in[j] < 0) continue; 532 #if defined(PETSC_USE_DEBUG) 533 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 534 #endif 535 else { 536 if (mat->was_assembled) { 537 if (!aij->colmap) { 538 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 539 } 540 #if defined(PETSC_USE_CTABLE) 541 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 542 col--; 543 #else 544 col = aij->colmap[in[j]] - 1; 545 #endif 546 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 547 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 548 col = in[j]; 549 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 550 B = aij->B; 551 b = (Mat_SeqAIJ*)B->data; 552 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 553 rp2 = bj + bi[row]; 554 ap2 = ba + bi[row]; 555 rmax2 = bimax[row]; 556 nrow2 = bilen[row]; 557 low2 = 0; 558 high2 = nrow2; 559 bm = aij->B->rmap->n; 560 ba = b->a; 561 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 562 } else col = in[j]; 563 nonew = b->nonew; 564 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 565 } 566 } 567 } else { 568 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 569 if (!aij->donotstash) { 570 mat->assembled = PETSC_FALSE; 571 if (roworiented) { 572 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 573 } else { 574 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 575 } 576 } 577 } 578 } 579 PetscFunctionReturn(0); 580 } 581 582 #undef __FUNCT__ 583 #define __FUNCT__ "MatGetValues_MPIAIJ" 584 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 585 { 586 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 587 PetscErrorCode ierr; 588 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 589 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 590 591 PetscFunctionBegin; 592 for (i=0; i<m; i++) { 593 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 594 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 595 if (idxm[i] >= rstart && idxm[i] < rend) { 596 row = idxm[i] - rstart; 597 for (j=0; j<n; j++) { 598 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 599 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 600 if (idxn[j] >= cstart && idxn[j] < cend) { 601 col = idxn[j] - cstart; 602 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 603 } else { 604 if (!aij->colmap) { 605 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 606 } 607 #if defined(PETSC_USE_CTABLE) 608 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 609 col--; 610 #else 611 col = aij->colmap[idxn[j]] - 1; 612 #endif 613 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 614 else { 615 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 616 } 617 } 618 } 619 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 620 } 621 PetscFunctionReturn(0); 622 } 623 624 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 625 626 #undef __FUNCT__ 627 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 628 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 629 { 630 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 631 PetscErrorCode ierr; 632 PetscInt nstash,reallocs; 633 InsertMode addv; 634 635 PetscFunctionBegin; 636 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 637 638 /* make sure all processors are either in INSERTMODE or ADDMODE */ 639 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 640 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 641 mat->insertmode = addv; /* in case this processor had no cache */ 642 643 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 644 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 645 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 646 PetscFunctionReturn(0); 647 } 648 649 #undef __FUNCT__ 650 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 651 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 652 { 653 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 654 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 655 PetscErrorCode ierr; 656 PetscMPIInt n; 657 PetscInt i,j,rstart,ncols,flg; 658 PetscInt *row,*col; 659 PetscBool other_disassembled; 660 PetscScalar *val; 661 InsertMode addv = mat->insertmode; 662 663 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 664 665 PetscFunctionBegin; 666 if (!aij->donotstash && !mat->nooffprocentries) { 667 while (1) { 668 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 669 if (!flg) break; 670 671 for (i=0; i<n; ) { 672 /* Now identify the consecutive vals belonging to the same row */ 673 for (j=i,rstart=row[j]; j<n; j++) { 674 if (row[j] != rstart) break; 675 } 676 if (j < n) ncols = j-i; 677 else ncols = n-i; 678 /* Now assemble all these values with a single function call */ 679 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 680 681 i = j; 682 } 683 } 684 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 685 } 686 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 687 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 688 689 /* determine if any processor has disassembled, if so we must 690 also disassemble ourselfs, in order that we may reassemble. */ 691 /* 692 if nonzero structure of submatrix B cannot change then we know that 693 no processor disassembled thus we can skip this stuff 694 */ 695 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 696 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 697 if (mat->was_assembled && !other_disassembled) { 698 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 699 } 700 } 701 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 702 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 703 } 704 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 705 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 706 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 707 708 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 709 710 aij->rowvalues = 0; 711 712 /* used by MatAXPY() */ 713 a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 714 a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 715 716 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 717 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 718 719 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 720 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 721 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 722 ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 723 } 724 PetscFunctionReturn(0); 725 } 726 727 #undef __FUNCT__ 728 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 729 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 730 { 731 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 732 PetscErrorCode ierr; 733 734 PetscFunctionBegin; 735 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 736 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 737 PetscFunctionReturn(0); 738 } 739 740 #undef __FUNCT__ 741 #define __FUNCT__ "MatZeroRows_MPIAIJ" 742 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 743 { 744 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 745 PetscInt *owners = A->rmap->range; 746 PetscInt n = A->rmap->n; 747 PetscSF sf; 748 PetscInt *lrows; 749 PetscSFNode *rrows; 750 PetscInt r, p = 0, len = 0; 751 PetscErrorCode ierr; 752 753 PetscFunctionBegin; 754 /* Create SF where leaves are input rows and roots are owned rows */ 755 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 756 for (r = 0; r < n; ++r) lrows[r] = -1; 757 if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);} 758 for (r = 0; r < N; ++r) { 759 const PetscInt idx = rows[r]; 760 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 761 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 762 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 763 } 764 if (A->nooffproczerorows) { 765 if (p != mat->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,mat->rank); 766 lrows[len++] = idx - owners[p]; 767 } else { 768 rrows[r].rank = p; 769 rrows[r].index = rows[r] - owners[p]; 770 } 771 } 772 if (!A->nooffproczerorows) { 773 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 774 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 775 /* Collect flags for rows to be zeroed */ 776 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr); 777 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr); 778 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 779 /* Compress and put in row numbers */ 780 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 781 } 782 /* fix right hand side if needed */ 783 if (x && b) { 784 const PetscScalar *xx; 785 PetscScalar *bb; 786 787 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 788 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 789 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 790 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 791 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 792 } 793 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 794 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 795 if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) { 796 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 797 } else if (diag != 0.0) { 798 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 799 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 800 for (r = 0; r < len; ++r) { 801 const PetscInt row = lrows[r] + A->rmap->rstart; 802 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 803 } 804 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 805 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 806 } else { 807 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 808 } 809 ierr = PetscFree(lrows);CHKERRQ(ierr); 810 811 /* only change matrix nonzero state if pattern was allowed to be changed */ 812 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 813 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 814 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 815 } 816 PetscFunctionReturn(0); 817 } 818 819 #undef __FUNCT__ 820 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 821 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 822 { 823 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 824 PetscErrorCode ierr; 825 PetscMPIInt n = A->rmap->n; 826 PetscInt i,j,r,m,p = 0,len = 0; 827 PetscInt *lrows,*owners = A->rmap->range; 828 PetscSFNode *rrows; 829 PetscSF sf; 830 const PetscScalar *xx; 831 PetscScalar *bb,*mask; 832 Vec xmask,lmask; 833 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 834 const PetscInt *aj, *ii,*ridx; 835 PetscScalar *aa; 836 837 PetscFunctionBegin; 838 /* Create SF where leaves are input rows and roots are owned rows */ 839 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 840 for (r = 0; r < n; ++r) lrows[r] = -1; 841 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 842 for (r = 0; r < N; ++r) { 843 const PetscInt idx = rows[r]; 844 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 845 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 846 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 847 } 848 rrows[r].rank = p; 849 rrows[r].index = rows[r] - owners[p]; 850 } 851 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 852 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 853 /* Collect flags for rows to be zeroed */ 854 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 855 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 856 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 857 /* Compress and put in row numbers */ 858 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 859 /* zero diagonal part of matrix */ 860 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 861 /* handle off diagonal part of matrix */ 862 ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr); 863 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 864 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 865 for (i=0; i<len; i++) bb[lrows[i]] = 1; 866 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 867 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 868 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 869 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 870 if (x) { 871 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 872 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 873 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 874 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 875 } 876 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 877 /* remove zeroed rows of off diagonal matrix */ 878 ii = aij->i; 879 for (i=0; i<len; i++) { 880 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 881 } 882 /* loop over all elements of off process part of matrix zeroing removed columns*/ 883 if (aij->compressedrow.use) { 884 m = aij->compressedrow.nrows; 885 ii = aij->compressedrow.i; 886 ridx = aij->compressedrow.rindex; 887 for (i=0; i<m; i++) { 888 n = ii[i+1] - ii[i]; 889 aj = aij->j + ii[i]; 890 aa = aij->a + ii[i]; 891 892 for (j=0; j<n; j++) { 893 if (PetscAbsScalar(mask[*aj])) { 894 if (b) bb[*ridx] -= *aa*xx[*aj]; 895 *aa = 0.0; 896 } 897 aa++; 898 aj++; 899 } 900 ridx++; 901 } 902 } else { /* do not use compressed row format */ 903 m = l->B->rmap->n; 904 for (i=0; i<m; i++) { 905 n = ii[i+1] - ii[i]; 906 aj = aij->j + ii[i]; 907 aa = aij->a + ii[i]; 908 for (j=0; j<n; j++) { 909 if (PetscAbsScalar(mask[*aj])) { 910 if (b) bb[i] -= *aa*xx[*aj]; 911 *aa = 0.0; 912 } 913 aa++; 914 aj++; 915 } 916 } 917 } 918 if (x) { 919 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 920 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 921 } 922 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 923 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 924 ierr = PetscFree(lrows);CHKERRQ(ierr); 925 926 /* only change matrix nonzero state if pattern was allowed to be changed */ 927 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 928 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 929 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 930 } 931 PetscFunctionReturn(0); 932 } 933 934 #undef __FUNCT__ 935 #define __FUNCT__ "MatMult_MPIAIJ" 936 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 937 { 938 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 939 PetscErrorCode ierr; 940 PetscInt nt; 941 942 PetscFunctionBegin; 943 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 944 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 945 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 946 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 947 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 948 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 949 PetscFunctionReturn(0); 950 } 951 952 #undef __FUNCT__ 953 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 954 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 955 { 956 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 957 PetscErrorCode ierr; 958 959 PetscFunctionBegin; 960 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 961 PetscFunctionReturn(0); 962 } 963 964 #undef __FUNCT__ 965 #define __FUNCT__ "MatMultAdd_MPIAIJ" 966 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 967 { 968 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 969 PetscErrorCode ierr; 970 971 PetscFunctionBegin; 972 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 973 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 974 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 975 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 976 PetscFunctionReturn(0); 977 } 978 979 #undef __FUNCT__ 980 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 981 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 982 { 983 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 984 PetscErrorCode ierr; 985 PetscBool merged; 986 987 PetscFunctionBegin; 988 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 989 /* do nondiagonal part */ 990 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 991 if (!merged) { 992 /* send it on its way */ 993 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 994 /* do local part */ 995 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 996 /* receive remote parts: note this assumes the values are not actually */ 997 /* added in yy until the next line, */ 998 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 999 } else { 1000 /* do local part */ 1001 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1002 /* send it on its way */ 1003 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1004 /* values actually were received in the Begin() but we need to call this nop */ 1005 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1006 } 1007 PetscFunctionReturn(0); 1008 } 1009 1010 #undef __FUNCT__ 1011 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1012 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1013 { 1014 MPI_Comm comm; 1015 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1016 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1017 IS Me,Notme; 1018 PetscErrorCode ierr; 1019 PetscInt M,N,first,last,*notme,i; 1020 PetscMPIInt size; 1021 1022 PetscFunctionBegin; 1023 /* Easy test: symmetric diagonal block */ 1024 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1025 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1026 if (!*f) PetscFunctionReturn(0); 1027 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1028 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1029 if (size == 1) PetscFunctionReturn(0); 1030 1031 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1032 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1033 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1034 ierr = PetscMalloc1((N-last+first),¬me);CHKERRQ(ierr); 1035 for (i=0; i<first; i++) notme[i] = i; 1036 for (i=last; i<M; i++) notme[i-last+first] = i; 1037 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1038 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1039 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1040 Aoff = Aoffs[0]; 1041 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1042 Boff = Boffs[0]; 1043 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1044 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1045 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1046 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1047 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1048 ierr = PetscFree(notme);CHKERRQ(ierr); 1049 PetscFunctionReturn(0); 1050 } 1051 1052 #undef __FUNCT__ 1053 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1054 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1055 { 1056 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1057 PetscErrorCode ierr; 1058 1059 PetscFunctionBegin; 1060 /* do nondiagonal part */ 1061 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1062 /* send it on its way */ 1063 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1064 /* do local part */ 1065 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1066 /* receive remote parts */ 1067 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1068 PetscFunctionReturn(0); 1069 } 1070 1071 /* 1072 This only works correctly for square matrices where the subblock A->A is the 1073 diagonal block 1074 */ 1075 #undef __FUNCT__ 1076 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1077 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1078 { 1079 PetscErrorCode ierr; 1080 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1081 1082 PetscFunctionBegin; 1083 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1084 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1085 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1086 PetscFunctionReturn(0); 1087 } 1088 1089 #undef __FUNCT__ 1090 #define __FUNCT__ "MatScale_MPIAIJ" 1091 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1092 { 1093 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1094 PetscErrorCode ierr; 1095 1096 PetscFunctionBegin; 1097 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1098 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1099 PetscFunctionReturn(0); 1100 } 1101 1102 #undef __FUNCT__ 1103 #define __FUNCT__ "MatDestroy_Redundant" 1104 PetscErrorCode MatDestroy_Redundant(Mat_Redundant **redundant) 1105 { 1106 PetscErrorCode ierr; 1107 Mat_Redundant *redund = *redundant; 1108 PetscInt i; 1109 1110 PetscFunctionBegin; 1111 *redundant = NULL; 1112 if (redund){ 1113 if (redund->matseq) { /* via MatGetSubMatrices() */ 1114 ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr); 1115 ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr); 1116 ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr); 1117 ierr = PetscFree(redund->matseq);CHKERRQ(ierr); 1118 } else { 1119 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 1120 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 1121 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 1122 for (i=0; i<redund->nrecvs; i++) { 1123 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 1124 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 1125 } 1126 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 1127 } 1128 1129 if (redund->psubcomm) { 1130 ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr); 1131 } 1132 ierr = PetscFree(redund);CHKERRQ(ierr); 1133 } 1134 PetscFunctionReturn(0); 1135 } 1136 1137 #undef __FUNCT__ 1138 #define __FUNCT__ "MatDestroy_MPIAIJ" 1139 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1140 { 1141 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1142 PetscErrorCode ierr; 1143 1144 PetscFunctionBegin; 1145 #if defined(PETSC_USE_LOG) 1146 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1147 #endif 1148 ierr = MatDestroy_Redundant(&aij->redundant);CHKERRQ(ierr); 1149 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1150 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1151 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1152 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1153 #if defined(PETSC_USE_CTABLE) 1154 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1155 #else 1156 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1157 #endif 1158 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1159 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1160 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1161 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1162 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1163 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1164 1165 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1166 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1167 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1168 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1169 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1170 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1171 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1172 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1173 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1174 PetscFunctionReturn(0); 1175 } 1176 1177 #undef __FUNCT__ 1178 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1179 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1180 { 1181 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1182 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1183 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1184 PetscErrorCode ierr; 1185 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1186 int fd; 1187 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1188 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1189 PetscScalar *column_values; 1190 PetscInt message_count,flowcontrolcount; 1191 FILE *file; 1192 1193 PetscFunctionBegin; 1194 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1195 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1196 nz = A->nz + B->nz; 1197 if (!rank) { 1198 header[0] = MAT_FILE_CLASSID; 1199 header[1] = mat->rmap->N; 1200 header[2] = mat->cmap->N; 1201 1202 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1203 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1204 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1205 /* get largest number of rows any processor has */ 1206 rlen = mat->rmap->n; 1207 range = mat->rmap->range; 1208 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1209 } else { 1210 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1211 rlen = mat->rmap->n; 1212 } 1213 1214 /* load up the local row counts */ 1215 ierr = PetscMalloc1((rlen+1),&row_lengths);CHKERRQ(ierr); 1216 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1217 1218 /* store the row lengths to the file */ 1219 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1220 if (!rank) { 1221 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1222 for (i=1; i<size; i++) { 1223 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1224 rlen = range[i+1] - range[i]; 1225 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1226 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1227 } 1228 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1229 } else { 1230 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1231 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1232 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1233 } 1234 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1235 1236 /* load up the local column indices */ 1237 nzmax = nz; /* th processor needs space a largest processor needs */ 1238 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1239 ierr = PetscMalloc1((nzmax+1),&column_indices);CHKERRQ(ierr); 1240 cnt = 0; 1241 for (i=0; i<mat->rmap->n; i++) { 1242 for (j=B->i[i]; j<B->i[i+1]; j++) { 1243 if ((col = garray[B->j[j]]) > cstart) break; 1244 column_indices[cnt++] = col; 1245 } 1246 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1247 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1248 } 1249 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1250 1251 /* store the column indices to the file */ 1252 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1253 if (!rank) { 1254 MPI_Status status; 1255 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1256 for (i=1; i<size; i++) { 1257 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1258 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1259 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1260 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1261 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1262 } 1263 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1264 } else { 1265 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1266 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1267 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1268 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1269 } 1270 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1271 1272 /* load up the local column values */ 1273 ierr = PetscMalloc1((nzmax+1),&column_values);CHKERRQ(ierr); 1274 cnt = 0; 1275 for (i=0; i<mat->rmap->n; i++) { 1276 for (j=B->i[i]; j<B->i[i+1]; j++) { 1277 if (garray[B->j[j]] > cstart) break; 1278 column_values[cnt++] = B->a[j]; 1279 } 1280 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1281 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1282 } 1283 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1284 1285 /* store the column values to the file */ 1286 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1287 if (!rank) { 1288 MPI_Status status; 1289 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1290 for (i=1; i<size; i++) { 1291 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1292 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1293 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1294 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1295 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1296 } 1297 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1298 } else { 1299 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1300 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1301 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1302 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1303 } 1304 ierr = PetscFree(column_values);CHKERRQ(ierr); 1305 1306 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1307 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1308 PetscFunctionReturn(0); 1309 } 1310 1311 #include <petscdraw.h> 1312 #undef __FUNCT__ 1313 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1314 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1315 { 1316 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1317 PetscErrorCode ierr; 1318 PetscMPIInt rank = aij->rank,size = aij->size; 1319 PetscBool isdraw,iascii,isbinary; 1320 PetscViewer sviewer; 1321 PetscViewerFormat format; 1322 1323 PetscFunctionBegin; 1324 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1325 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1326 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1327 if (iascii) { 1328 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1329 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1330 MatInfo info; 1331 PetscBool inodes; 1332 1333 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1334 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1335 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1336 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1337 if (!inodes) { 1338 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1339 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1340 } else { 1341 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1342 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1343 } 1344 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1345 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1346 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1347 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1348 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1349 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1350 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1351 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1352 PetscFunctionReturn(0); 1353 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1354 PetscInt inodecount,inodelimit,*inodes; 1355 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1356 if (inodes) { 1357 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1358 } else { 1359 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1360 } 1361 PetscFunctionReturn(0); 1362 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1363 PetscFunctionReturn(0); 1364 } 1365 } else if (isbinary) { 1366 if (size == 1) { 1367 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1368 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1369 } else { 1370 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1371 } 1372 PetscFunctionReturn(0); 1373 } else if (isdraw) { 1374 PetscDraw draw; 1375 PetscBool isnull; 1376 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1377 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1378 } 1379 1380 { 1381 /* assemble the entire matrix onto first processor. */ 1382 Mat A; 1383 Mat_SeqAIJ *Aloc; 1384 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1385 MatScalar *a; 1386 const char *matname; 1387 1388 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1389 if (!rank) { 1390 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1391 } else { 1392 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1393 } 1394 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1395 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1396 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1397 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1398 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1399 1400 /* copy over the A part */ 1401 Aloc = (Mat_SeqAIJ*)aij->A->data; 1402 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1403 row = mat->rmap->rstart; 1404 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1405 for (i=0; i<m; i++) { 1406 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1407 row++; 1408 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1409 } 1410 aj = Aloc->j; 1411 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1412 1413 /* copy over the B part */ 1414 Aloc = (Mat_SeqAIJ*)aij->B->data; 1415 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1416 row = mat->rmap->rstart; 1417 ierr = PetscMalloc1((ai[m]+1),&cols);CHKERRQ(ierr); 1418 ct = cols; 1419 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1420 for (i=0; i<m; i++) { 1421 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1422 row++; 1423 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1424 } 1425 ierr = PetscFree(ct);CHKERRQ(ierr); 1426 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1427 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1428 /* 1429 Everyone has to call to draw the matrix since the graphics waits are 1430 synchronized across all processors that share the PetscDraw object 1431 */ 1432 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1433 ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr); 1434 if (!rank) { 1435 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,matname);CHKERRQ(ierr); 1436 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1437 } 1438 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1439 ierr = MatDestroy(&A);CHKERRQ(ierr); 1440 } 1441 PetscFunctionReturn(0); 1442 } 1443 1444 #undef __FUNCT__ 1445 #define __FUNCT__ "MatView_MPIAIJ" 1446 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1447 { 1448 PetscErrorCode ierr; 1449 PetscBool iascii,isdraw,issocket,isbinary; 1450 1451 PetscFunctionBegin; 1452 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1453 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1454 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1455 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1456 if (iascii || isdraw || isbinary || issocket) { 1457 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1458 } 1459 PetscFunctionReturn(0); 1460 } 1461 1462 #undef __FUNCT__ 1463 #define __FUNCT__ "MatSOR_MPIAIJ" 1464 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1465 { 1466 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1467 PetscErrorCode ierr; 1468 Vec bb1 = 0; 1469 PetscBool hasop; 1470 1471 PetscFunctionBegin; 1472 if (flag == SOR_APPLY_UPPER) { 1473 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1474 PetscFunctionReturn(0); 1475 } 1476 1477 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1478 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1479 } 1480 1481 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1482 if (flag & SOR_ZERO_INITIAL_GUESS) { 1483 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1484 its--; 1485 } 1486 1487 while (its--) { 1488 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1489 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1490 1491 /* update rhs: bb1 = bb - B*x */ 1492 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1493 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1494 1495 /* local sweep */ 1496 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1497 } 1498 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1499 if (flag & SOR_ZERO_INITIAL_GUESS) { 1500 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1501 its--; 1502 } 1503 while (its--) { 1504 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1505 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1506 1507 /* update rhs: bb1 = bb - B*x */ 1508 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1509 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1510 1511 /* local sweep */ 1512 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1513 } 1514 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1515 if (flag & SOR_ZERO_INITIAL_GUESS) { 1516 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1517 its--; 1518 } 1519 while (its--) { 1520 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1521 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1522 1523 /* update rhs: bb1 = bb - B*x */ 1524 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1525 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1526 1527 /* local sweep */ 1528 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1529 } 1530 } else if (flag & SOR_EISENSTAT) { 1531 Vec xx1; 1532 1533 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1534 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1535 1536 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1537 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1538 if (!mat->diag) { 1539 ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1540 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1541 } 1542 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1543 if (hasop) { 1544 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1545 } else { 1546 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1547 } 1548 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1549 1550 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1551 1552 /* local sweep */ 1553 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1554 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1555 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1556 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1557 1558 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1559 PetscFunctionReturn(0); 1560 } 1561 1562 #undef __FUNCT__ 1563 #define __FUNCT__ "MatPermute_MPIAIJ" 1564 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1565 { 1566 Mat aA,aB,Aperm; 1567 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1568 PetscScalar *aa,*ba; 1569 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1570 PetscSF rowsf,sf; 1571 IS parcolp = NULL; 1572 PetscBool done; 1573 PetscErrorCode ierr; 1574 1575 PetscFunctionBegin; 1576 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1577 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1578 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1579 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1580 1581 /* Invert row permutation to find out where my rows should go */ 1582 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1583 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1584 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1585 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1586 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1587 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1588 1589 /* Invert column permutation to find out where my columns should go */ 1590 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1591 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1592 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1593 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1594 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1595 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1596 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1597 1598 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1599 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1600 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1601 1602 /* Find out where my gcols should go */ 1603 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1604 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1605 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1606 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1607 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1608 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1609 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1610 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1611 1612 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1613 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1614 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1615 for (i=0; i<m; i++) { 1616 PetscInt row = rdest[i],rowner; 1617 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1618 for (j=ai[i]; j<ai[i+1]; j++) { 1619 PetscInt cowner,col = cdest[aj[j]]; 1620 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1621 if (rowner == cowner) dnnz[i]++; 1622 else onnz[i]++; 1623 } 1624 for (j=bi[i]; j<bi[i+1]; j++) { 1625 PetscInt cowner,col = gcdest[bj[j]]; 1626 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1627 if (rowner == cowner) dnnz[i]++; 1628 else onnz[i]++; 1629 } 1630 } 1631 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1632 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1633 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1634 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1635 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1636 1637 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1638 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1639 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1640 for (i=0; i<m; i++) { 1641 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1642 PetscInt j0,rowlen; 1643 rowlen = ai[i+1] - ai[i]; 1644 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1645 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1646 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1647 } 1648 rowlen = bi[i+1] - bi[i]; 1649 for (j0=j=0; j<rowlen; j0=j) { 1650 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1651 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1652 } 1653 } 1654 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1655 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1656 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1657 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1658 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1659 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1660 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1661 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1662 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1663 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1664 *B = Aperm; 1665 PetscFunctionReturn(0); 1666 } 1667 1668 #undef __FUNCT__ 1669 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1670 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1671 { 1672 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1673 Mat A = mat->A,B = mat->B; 1674 PetscErrorCode ierr; 1675 PetscReal isend[5],irecv[5]; 1676 1677 PetscFunctionBegin; 1678 info->block_size = 1.0; 1679 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1680 1681 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1682 isend[3] = info->memory; isend[4] = info->mallocs; 1683 1684 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1685 1686 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1687 isend[3] += info->memory; isend[4] += info->mallocs; 1688 if (flag == MAT_LOCAL) { 1689 info->nz_used = isend[0]; 1690 info->nz_allocated = isend[1]; 1691 info->nz_unneeded = isend[2]; 1692 info->memory = isend[3]; 1693 info->mallocs = isend[4]; 1694 } else if (flag == MAT_GLOBAL_MAX) { 1695 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1696 1697 info->nz_used = irecv[0]; 1698 info->nz_allocated = irecv[1]; 1699 info->nz_unneeded = irecv[2]; 1700 info->memory = irecv[3]; 1701 info->mallocs = irecv[4]; 1702 } else if (flag == MAT_GLOBAL_SUM) { 1703 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1704 1705 info->nz_used = irecv[0]; 1706 info->nz_allocated = irecv[1]; 1707 info->nz_unneeded = irecv[2]; 1708 info->memory = irecv[3]; 1709 info->mallocs = irecv[4]; 1710 } 1711 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1712 info->fill_ratio_needed = 0; 1713 info->factor_mallocs = 0; 1714 PetscFunctionReturn(0); 1715 } 1716 1717 #undef __FUNCT__ 1718 #define __FUNCT__ "MatSetOption_MPIAIJ" 1719 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1720 { 1721 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1722 PetscErrorCode ierr; 1723 1724 PetscFunctionBegin; 1725 switch (op) { 1726 case MAT_NEW_NONZERO_LOCATIONS: 1727 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1728 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1729 case MAT_KEEP_NONZERO_PATTERN: 1730 case MAT_NEW_NONZERO_LOCATION_ERR: 1731 case MAT_USE_INODES: 1732 case MAT_IGNORE_ZERO_ENTRIES: 1733 MatCheckPreallocated(A,1); 1734 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1735 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1736 break; 1737 case MAT_ROW_ORIENTED: 1738 a->roworiented = flg; 1739 1740 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1741 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1742 break; 1743 case MAT_NEW_DIAGONALS: 1744 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1745 break; 1746 case MAT_IGNORE_OFF_PROC_ENTRIES: 1747 a->donotstash = flg; 1748 break; 1749 case MAT_SPD: 1750 A->spd_set = PETSC_TRUE; 1751 A->spd = flg; 1752 if (flg) { 1753 A->symmetric = PETSC_TRUE; 1754 A->structurally_symmetric = PETSC_TRUE; 1755 A->symmetric_set = PETSC_TRUE; 1756 A->structurally_symmetric_set = PETSC_TRUE; 1757 } 1758 break; 1759 case MAT_SYMMETRIC: 1760 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1761 break; 1762 case MAT_STRUCTURALLY_SYMMETRIC: 1763 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1764 break; 1765 case MAT_HERMITIAN: 1766 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1767 break; 1768 case MAT_SYMMETRY_ETERNAL: 1769 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1770 break; 1771 default: 1772 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1773 } 1774 PetscFunctionReturn(0); 1775 } 1776 1777 #undef __FUNCT__ 1778 #define __FUNCT__ "MatGetRow_MPIAIJ" 1779 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1780 { 1781 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1782 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1783 PetscErrorCode ierr; 1784 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1785 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1786 PetscInt *cmap,*idx_p; 1787 1788 PetscFunctionBegin; 1789 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1790 mat->getrowactive = PETSC_TRUE; 1791 1792 if (!mat->rowvalues && (idx || v)) { 1793 /* 1794 allocate enough space to hold information from the longest row. 1795 */ 1796 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1797 PetscInt max = 1,tmp; 1798 for (i=0; i<matin->rmap->n; i++) { 1799 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1800 if (max < tmp) max = tmp; 1801 } 1802 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1803 } 1804 1805 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1806 lrow = row - rstart; 1807 1808 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1809 if (!v) {pvA = 0; pvB = 0;} 1810 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1811 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1812 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1813 nztot = nzA + nzB; 1814 1815 cmap = mat->garray; 1816 if (v || idx) { 1817 if (nztot) { 1818 /* Sort by increasing column numbers, assuming A and B already sorted */ 1819 PetscInt imark = -1; 1820 if (v) { 1821 *v = v_p = mat->rowvalues; 1822 for (i=0; i<nzB; i++) { 1823 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1824 else break; 1825 } 1826 imark = i; 1827 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1828 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1829 } 1830 if (idx) { 1831 *idx = idx_p = mat->rowindices; 1832 if (imark > -1) { 1833 for (i=0; i<imark; i++) { 1834 idx_p[i] = cmap[cworkB[i]]; 1835 } 1836 } else { 1837 for (i=0; i<nzB; i++) { 1838 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1839 else break; 1840 } 1841 imark = i; 1842 } 1843 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1844 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1845 } 1846 } else { 1847 if (idx) *idx = 0; 1848 if (v) *v = 0; 1849 } 1850 } 1851 *nz = nztot; 1852 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1853 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1854 PetscFunctionReturn(0); 1855 } 1856 1857 #undef __FUNCT__ 1858 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1859 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1860 { 1861 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1862 1863 PetscFunctionBegin; 1864 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1865 aij->getrowactive = PETSC_FALSE; 1866 PetscFunctionReturn(0); 1867 } 1868 1869 #undef __FUNCT__ 1870 #define __FUNCT__ "MatNorm_MPIAIJ" 1871 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1872 { 1873 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1874 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1875 PetscErrorCode ierr; 1876 PetscInt i,j,cstart = mat->cmap->rstart; 1877 PetscReal sum = 0.0; 1878 MatScalar *v; 1879 1880 PetscFunctionBegin; 1881 if (aij->size == 1) { 1882 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1883 } else { 1884 if (type == NORM_FROBENIUS) { 1885 v = amat->a; 1886 for (i=0; i<amat->nz; i++) { 1887 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1888 } 1889 v = bmat->a; 1890 for (i=0; i<bmat->nz; i++) { 1891 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1892 } 1893 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1894 *norm = PetscSqrtReal(*norm); 1895 } else if (type == NORM_1) { /* max column norm */ 1896 PetscReal *tmp,*tmp2; 1897 PetscInt *jj,*garray = aij->garray; 1898 ierr = PetscCalloc1((mat->cmap->N+1),&tmp);CHKERRQ(ierr); 1899 ierr = PetscMalloc1((mat->cmap->N+1),&tmp2);CHKERRQ(ierr); 1900 *norm = 0.0; 1901 v = amat->a; jj = amat->j; 1902 for (j=0; j<amat->nz; j++) { 1903 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1904 } 1905 v = bmat->a; jj = bmat->j; 1906 for (j=0; j<bmat->nz; j++) { 1907 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1908 } 1909 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1910 for (j=0; j<mat->cmap->N; j++) { 1911 if (tmp2[j] > *norm) *norm = tmp2[j]; 1912 } 1913 ierr = PetscFree(tmp);CHKERRQ(ierr); 1914 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1915 } else if (type == NORM_INFINITY) { /* max row norm */ 1916 PetscReal ntemp = 0.0; 1917 for (j=0; j<aij->A->rmap->n; j++) { 1918 v = amat->a + amat->i[j]; 1919 sum = 0.0; 1920 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1921 sum += PetscAbsScalar(*v); v++; 1922 } 1923 v = bmat->a + bmat->i[j]; 1924 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1925 sum += PetscAbsScalar(*v); v++; 1926 } 1927 if (sum > ntemp) ntemp = sum; 1928 } 1929 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1930 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1931 } 1932 PetscFunctionReturn(0); 1933 } 1934 1935 #undef __FUNCT__ 1936 #define __FUNCT__ "MatTranspose_MPIAIJ" 1937 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1938 { 1939 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1940 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1941 PetscErrorCode ierr; 1942 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1943 PetscInt cstart = A->cmap->rstart,ncol; 1944 Mat B; 1945 MatScalar *array; 1946 1947 PetscFunctionBegin; 1948 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1949 1950 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1951 ai = Aloc->i; aj = Aloc->j; 1952 bi = Bloc->i; bj = Bloc->j; 1953 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1954 PetscInt *d_nnz,*g_nnz,*o_nnz; 1955 PetscSFNode *oloc; 1956 PETSC_UNUSED PetscSF sf; 1957 1958 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1959 /* compute d_nnz for preallocation */ 1960 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1961 for (i=0; i<ai[ma]; i++) { 1962 d_nnz[aj[i]]++; 1963 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1964 } 1965 /* compute local off-diagonal contributions */ 1966 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1967 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1968 /* map those to global */ 1969 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1970 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1971 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1972 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1973 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1974 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1975 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1976 1977 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1978 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1979 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1980 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1981 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1982 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 1983 } else { 1984 B = *matout; 1985 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1986 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1987 } 1988 1989 /* copy over the A part */ 1990 array = Aloc->a; 1991 row = A->rmap->rstart; 1992 for (i=0; i<ma; i++) { 1993 ncol = ai[i+1]-ai[i]; 1994 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1995 row++; 1996 array += ncol; aj += ncol; 1997 } 1998 aj = Aloc->j; 1999 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2000 2001 /* copy over the B part */ 2002 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 2003 array = Bloc->a; 2004 row = A->rmap->rstart; 2005 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2006 cols_tmp = cols; 2007 for (i=0; i<mb; i++) { 2008 ncol = bi[i+1]-bi[i]; 2009 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2010 row++; 2011 array += ncol; cols_tmp += ncol; 2012 } 2013 ierr = PetscFree(cols);CHKERRQ(ierr); 2014 2015 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2016 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2017 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2018 *matout = B; 2019 } else { 2020 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2021 } 2022 PetscFunctionReturn(0); 2023 } 2024 2025 #undef __FUNCT__ 2026 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2027 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2028 { 2029 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2030 Mat a = aij->A,b = aij->B; 2031 PetscErrorCode ierr; 2032 PetscInt s1,s2,s3; 2033 2034 PetscFunctionBegin; 2035 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2036 if (rr) { 2037 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2038 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2039 /* Overlap communication with computation. */ 2040 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2041 } 2042 if (ll) { 2043 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2044 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2045 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2046 } 2047 /* scale the diagonal block */ 2048 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2049 2050 if (rr) { 2051 /* Do a scatter end and then right scale the off-diagonal block */ 2052 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2053 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2054 } 2055 PetscFunctionReturn(0); 2056 } 2057 2058 #undef __FUNCT__ 2059 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2060 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2061 { 2062 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2063 PetscErrorCode ierr; 2064 2065 PetscFunctionBegin; 2066 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2067 PetscFunctionReturn(0); 2068 } 2069 2070 #undef __FUNCT__ 2071 #define __FUNCT__ "MatEqual_MPIAIJ" 2072 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2073 { 2074 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2075 Mat a,b,c,d; 2076 PetscBool flg; 2077 PetscErrorCode ierr; 2078 2079 PetscFunctionBegin; 2080 a = matA->A; b = matA->B; 2081 c = matB->A; d = matB->B; 2082 2083 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2084 if (flg) { 2085 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2086 } 2087 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2088 PetscFunctionReturn(0); 2089 } 2090 2091 #undef __FUNCT__ 2092 #define __FUNCT__ "MatCopy_MPIAIJ" 2093 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2094 { 2095 PetscErrorCode ierr; 2096 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2097 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2098 2099 PetscFunctionBegin; 2100 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2101 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2102 /* because of the column compression in the off-processor part of the matrix a->B, 2103 the number of columns in a->B and b->B may be different, hence we cannot call 2104 the MatCopy() directly on the two parts. If need be, we can provide a more 2105 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2106 then copying the submatrices */ 2107 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2108 } else { 2109 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2110 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2111 } 2112 PetscFunctionReturn(0); 2113 } 2114 2115 #undef __FUNCT__ 2116 #define __FUNCT__ "MatSetUp_MPIAIJ" 2117 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2118 { 2119 PetscErrorCode ierr; 2120 2121 PetscFunctionBegin; 2122 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2123 PetscFunctionReturn(0); 2124 } 2125 2126 /* 2127 Computes the number of nonzeros per row needed for preallocation when X and Y 2128 have different nonzero structure. 2129 */ 2130 #undef __FUNCT__ 2131 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private" 2132 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2133 { 2134 PetscInt i,j,k,nzx,nzy; 2135 2136 PetscFunctionBegin; 2137 /* Set the number of nonzeros in the new matrix */ 2138 for (i=0; i<m; i++) { 2139 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2140 nzx = xi[i+1] - xi[i]; 2141 nzy = yi[i+1] - yi[i]; 2142 nnz[i] = 0; 2143 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2144 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2145 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2146 nnz[i]++; 2147 } 2148 for (; k<nzy; k++) nnz[i]++; 2149 } 2150 PetscFunctionReturn(0); 2151 } 2152 2153 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2154 #undef __FUNCT__ 2155 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2156 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2157 { 2158 PetscErrorCode ierr; 2159 PetscInt m = Y->rmap->N; 2160 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2161 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2162 2163 PetscFunctionBegin; 2164 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2165 PetscFunctionReturn(0); 2166 } 2167 2168 #undef __FUNCT__ 2169 #define __FUNCT__ "MatAXPY_MPIAIJ" 2170 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2171 { 2172 PetscErrorCode ierr; 2173 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2174 PetscBLASInt bnz,one=1; 2175 Mat_SeqAIJ *x,*y; 2176 2177 PetscFunctionBegin; 2178 if (str == SAME_NONZERO_PATTERN) { 2179 PetscScalar alpha = a; 2180 x = (Mat_SeqAIJ*)xx->A->data; 2181 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2182 y = (Mat_SeqAIJ*)yy->A->data; 2183 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2184 x = (Mat_SeqAIJ*)xx->B->data; 2185 y = (Mat_SeqAIJ*)yy->B->data; 2186 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2187 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2188 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2189 } else if (str == SUBSET_NONZERO_PATTERN) { 2190 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2191 } else { 2192 Mat B; 2193 PetscInt *nnz_d,*nnz_o; 2194 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2195 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2196 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2197 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2198 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2199 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2200 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2201 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2202 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2203 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2204 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2205 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2206 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2207 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2208 } 2209 PetscFunctionReturn(0); 2210 } 2211 2212 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2213 2214 #undef __FUNCT__ 2215 #define __FUNCT__ "MatConjugate_MPIAIJ" 2216 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2217 { 2218 #if defined(PETSC_USE_COMPLEX) 2219 PetscErrorCode ierr; 2220 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2221 2222 PetscFunctionBegin; 2223 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2224 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2225 #else 2226 PetscFunctionBegin; 2227 #endif 2228 PetscFunctionReturn(0); 2229 } 2230 2231 #undef __FUNCT__ 2232 #define __FUNCT__ "MatRealPart_MPIAIJ" 2233 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2234 { 2235 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2236 PetscErrorCode ierr; 2237 2238 PetscFunctionBegin; 2239 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2240 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2241 PetscFunctionReturn(0); 2242 } 2243 2244 #undef __FUNCT__ 2245 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2246 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2247 { 2248 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2249 PetscErrorCode ierr; 2250 2251 PetscFunctionBegin; 2252 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2253 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2254 PetscFunctionReturn(0); 2255 } 2256 2257 #if defined(PETSC_HAVE_PBGL) 2258 2259 #include <boost/parallel/mpi/bsp_process_group.hpp> 2260 #include <boost/graph/distributed/ilu_default_graph.hpp> 2261 #include <boost/graph/distributed/ilu_0_block.hpp> 2262 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2263 #include <boost/graph/distributed/petsc/interface.hpp> 2264 #include <boost/multi_array.hpp> 2265 #include <boost/parallel/distributed_property_map->hpp> 2266 2267 #undef __FUNCT__ 2268 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2269 /* 2270 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2271 */ 2272 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2273 { 2274 namespace petsc = boost::distributed::petsc; 2275 2276 namespace graph_dist = boost::graph::distributed; 2277 using boost::graph::distributed::ilu_default::process_group_type; 2278 using boost::graph::ilu_permuted; 2279 2280 PetscBool row_identity, col_identity; 2281 PetscContainer c; 2282 PetscInt m, n, M, N; 2283 PetscErrorCode ierr; 2284 2285 PetscFunctionBegin; 2286 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2287 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2288 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2289 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2290 2291 process_group_type pg; 2292 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2293 lgraph_type *lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2294 lgraph_type& level_graph = *lgraph_p; 2295 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2296 2297 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2298 ilu_permuted(level_graph); 2299 2300 /* put together the new matrix */ 2301 ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr); 2302 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2303 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2304 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2305 ierr = MatSetBlockSizesFromMats(fact,A,A);CHKERRQ(ierr); 2306 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2307 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2308 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2309 2310 ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c); 2311 ierr = PetscContainerSetPointer(c, lgraph_p); 2312 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2313 ierr = PetscContainerDestroy(&c); 2314 PetscFunctionReturn(0); 2315 } 2316 2317 #undef __FUNCT__ 2318 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2319 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2320 { 2321 PetscFunctionBegin; 2322 PetscFunctionReturn(0); 2323 } 2324 2325 #undef __FUNCT__ 2326 #define __FUNCT__ "MatSolve_MPIAIJ" 2327 /* 2328 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2329 */ 2330 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2331 { 2332 namespace graph_dist = boost::graph::distributed; 2333 2334 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2335 lgraph_type *lgraph_p; 2336 PetscContainer c; 2337 PetscErrorCode ierr; 2338 2339 PetscFunctionBegin; 2340 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr); 2341 ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr); 2342 ierr = VecCopy(b, x);CHKERRQ(ierr); 2343 2344 PetscScalar *array_x; 2345 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2346 PetscInt sx; 2347 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2348 2349 PetscScalar *array_b; 2350 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2351 PetscInt sb; 2352 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2353 2354 lgraph_type& level_graph = *lgraph_p; 2355 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2356 2357 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2358 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]); 2359 array_ref_type ref_x(array_x, boost::extents[num_vertices(graph)]); 2360 2361 typedef boost::iterator_property_map<array_ref_type::iterator, 2362 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2363 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)); 2364 gvector_type vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2365 2366 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2367 PetscFunctionReturn(0); 2368 } 2369 #endif 2370 2371 2372 #undef __FUNCT__ 2373 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced" 2374 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 2375 { 2376 PetscMPIInt rank,size; 2377 MPI_Comm comm; 2378 PetscErrorCode ierr; 2379 PetscInt nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N; 2380 PetscMPIInt *send_rank= NULL,*recv_rank=NULL,subrank,subsize; 2381 PetscInt *rowrange = mat->rmap->range; 2382 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2383 Mat A = aij->A,B=aij->B,C=*matredundant; 2384 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2385 PetscScalar *sbuf_a; 2386 PetscInt nzlocal=a->nz+b->nz; 2387 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2388 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray; 2389 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2390 MatScalar *aworkA,*aworkB; 2391 PetscScalar *vals; 2392 PetscMPIInt tag1,tag2,tag3,imdex; 2393 MPI_Request *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL; 2394 MPI_Request *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL; 2395 MPI_Status recv_status,*send_status; 2396 PetscInt *sbuf_nz=NULL,*rbuf_nz=NULL,count; 2397 PetscInt **rbuf_j=NULL; 2398 PetscScalar **rbuf_a=NULL; 2399 Mat_Redundant *redund =NULL; 2400 2401 PetscFunctionBegin; 2402 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2403 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2404 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2405 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2406 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2407 2408 if (reuse == MAT_REUSE_MATRIX) { 2409 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2410 if (subsize == 1) { 2411 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2412 redund = c->redundant; 2413 } else { 2414 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2415 redund = c->redundant; 2416 } 2417 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2418 2419 nsends = redund->nsends; 2420 nrecvs = redund->nrecvs; 2421 send_rank = redund->send_rank; 2422 recv_rank = redund->recv_rank; 2423 sbuf_nz = redund->sbuf_nz; 2424 rbuf_nz = redund->rbuf_nz; 2425 sbuf_j = redund->sbuf_j; 2426 sbuf_a = redund->sbuf_a; 2427 rbuf_j = redund->rbuf_j; 2428 rbuf_a = redund->rbuf_a; 2429 } 2430 2431 if (reuse == MAT_INITIAL_MATRIX) { 2432 PetscInt nleftover,np_subcomm; 2433 2434 /* get the destination processors' id send_rank, nsends and nrecvs */ 2435 ierr = PetscMalloc2(size,&send_rank,size,&recv_rank);CHKERRQ(ierr); 2436 2437 np_subcomm = size/nsubcomm; 2438 nleftover = size - nsubcomm*np_subcomm; 2439 2440 /* block of codes below is specific for INTERLACED */ 2441 /* ------------------------------------------------*/ 2442 nsends = 0; nrecvs = 0; 2443 for (i=0; i<size; i++) { 2444 if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */ 2445 send_rank[nsends++] = i; 2446 recv_rank[nrecvs++] = i; 2447 } 2448 } 2449 if (rank >= size - nleftover) { /* this proc is a leftover processor */ 2450 i = size-nleftover-1; 2451 j = 0; 2452 while (j < nsubcomm - nleftover) { 2453 send_rank[nsends++] = i; 2454 i--; j++; 2455 } 2456 } 2457 2458 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */ 2459 for (i=0; i<nleftover; i++) { 2460 recv_rank[nrecvs++] = size-nleftover+i; 2461 } 2462 } 2463 /*----------------------------------------------*/ 2464 2465 /* allocate sbuf_j, sbuf_a */ 2466 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2467 ierr = PetscMalloc1(i,&sbuf_j);CHKERRQ(ierr); 2468 ierr = PetscMalloc1((nzlocal+1),&sbuf_a);CHKERRQ(ierr); 2469 /* 2470 ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr); 2471 ierr = PetscSynchronizedFlush(comm,PETSC_STDOUT);CHKERRQ(ierr); 2472 */ 2473 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2474 2475 /* copy mat's local entries into the buffers */ 2476 if (reuse == MAT_INITIAL_MATRIX) { 2477 rownz_max = 0; 2478 rptr = sbuf_j; 2479 cols = sbuf_j + rend-rstart + 1; 2480 vals = sbuf_a; 2481 rptr[0] = 0; 2482 for (i=0; i<rend-rstart; i++) { 2483 row = i + rstart; 2484 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2485 ncols = nzA + nzB; 2486 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2487 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2488 /* load the column indices for this row into cols */ 2489 lwrite = 0; 2490 for (l=0; l<nzB; l++) { 2491 if ((ctmp = bmap[cworkB[l]]) < cstart) { 2492 vals[lwrite] = aworkB[l]; 2493 cols[lwrite++] = ctmp; 2494 } 2495 } 2496 for (l=0; l<nzA; l++) { 2497 vals[lwrite] = aworkA[l]; 2498 cols[lwrite++] = cstart + cworkA[l]; 2499 } 2500 for (l=0; l<nzB; l++) { 2501 if ((ctmp = bmap[cworkB[l]]) >= cend) { 2502 vals[lwrite] = aworkB[l]; 2503 cols[lwrite++] = ctmp; 2504 } 2505 } 2506 vals += ncols; 2507 cols += ncols; 2508 rptr[i+1] = rptr[i] + ncols; 2509 if (rownz_max < ncols) rownz_max = ncols; 2510 } 2511 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2512 } else { /* only copy matrix values into sbuf_a */ 2513 rptr = sbuf_j; 2514 vals = sbuf_a; 2515 rptr[0] = 0; 2516 for (i=0; i<rend-rstart; i++) { 2517 row = i + rstart; 2518 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2519 ncols = nzA + nzB; 2520 cworkB = b->j + b->i[i]; 2521 aworkA = a->a + a->i[i]; 2522 aworkB = b->a + b->i[i]; 2523 lwrite = 0; 2524 for (l=0; l<nzB; l++) { 2525 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2526 } 2527 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2528 for (l=0; l<nzB; l++) { 2529 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2530 } 2531 vals += ncols; 2532 rptr[i+1] = rptr[i] + ncols; 2533 } 2534 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2535 2536 /* send nzlocal to others, and recv other's nzlocal */ 2537 /*--------------------------------------------------*/ 2538 if (reuse == MAT_INITIAL_MATRIX) { 2539 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr); 2540 2541 s_waits2 = s_waits3 + nsends; 2542 s_waits1 = s_waits2 + nsends; 2543 r_waits1 = s_waits1 + nsends; 2544 r_waits2 = r_waits1 + nrecvs; 2545 r_waits3 = r_waits2 + nrecvs; 2546 } else { 2547 ierr = PetscMalloc2(nsends + nrecvs +1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr); 2548 2549 r_waits3 = s_waits3 + nsends; 2550 } 2551 2552 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2553 if (reuse == MAT_INITIAL_MATRIX) { 2554 /* get new tags to keep the communication clean */ 2555 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2556 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2557 ierr = PetscMalloc4(nsends,&sbuf_nz,nrecvs,&rbuf_nz,nrecvs,&rbuf_j,nrecvs,&rbuf_a);CHKERRQ(ierr); 2558 2559 /* post receives of other's nzlocal */ 2560 for (i=0; i<nrecvs; i++) { 2561 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2562 } 2563 /* send nzlocal to others */ 2564 for (i=0; i<nsends; i++) { 2565 sbuf_nz[i] = nzlocal; 2566 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2567 } 2568 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2569 count = nrecvs; 2570 while (count) { 2571 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2572 2573 recv_rank[imdex] = recv_status.MPI_SOURCE; 2574 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2575 ierr = PetscMalloc1((rbuf_nz[imdex]+1),&rbuf_a[imdex]);CHKERRQ(ierr); 2576 2577 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2578 2579 rbuf_nz[imdex] += i + 2; 2580 2581 ierr = PetscMalloc1(rbuf_nz[imdex],&rbuf_j[imdex]);CHKERRQ(ierr); 2582 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2583 count--; 2584 } 2585 /* wait on sends of nzlocal */ 2586 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2587 /* send mat->i,j to others, and recv from other's */ 2588 /*------------------------------------------------*/ 2589 for (i=0; i<nsends; i++) { 2590 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2591 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2592 } 2593 /* wait on receives of mat->i,j */ 2594 /*------------------------------*/ 2595 count = nrecvs; 2596 while (count) { 2597 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2598 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2599 count--; 2600 } 2601 /* wait on sends of mat->i,j */ 2602 /*---------------------------*/ 2603 if (nsends) { 2604 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2605 } 2606 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2607 2608 /* post receives, send and receive mat->a */ 2609 /*----------------------------------------*/ 2610 for (imdex=0; imdex<nrecvs; imdex++) { 2611 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2612 } 2613 for (i=0; i<nsends; i++) { 2614 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2615 } 2616 count = nrecvs; 2617 while (count) { 2618 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2619 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2620 count--; 2621 } 2622 if (nsends) { 2623 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2624 } 2625 2626 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2627 2628 /* create redundant matrix */ 2629 /*-------------------------*/ 2630 if (reuse == MAT_INITIAL_MATRIX) { 2631 const PetscInt *range; 2632 PetscInt rstart_sub,rend_sub,mloc_sub; 2633 2634 /* compute rownz_max for preallocation */ 2635 for (imdex=0; imdex<nrecvs; imdex++) { 2636 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2637 rptr = rbuf_j[imdex]; 2638 for (i=0; i<j; i++) { 2639 ncols = rptr[i+1] - rptr[i]; 2640 if (rownz_max < ncols) rownz_max = ncols; 2641 } 2642 } 2643 2644 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2645 2646 /* get local size of redundant matrix 2647 - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */ 2648 ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr); 2649 rstart_sub = range[nsubcomm*subrank]; 2650 if (subrank+1 < subsize) { /* not the last proc in subcomm */ 2651 rend_sub = range[nsubcomm*(subrank+1)]; 2652 } else { 2653 rend_sub = mat->rmap->N; 2654 } 2655 mloc_sub = rend_sub - rstart_sub; 2656 2657 if (M == N) { 2658 ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2659 } else { /* non-square matrix */ 2660 ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr); 2661 } 2662 ierr = MatSetBlockSizesFromMats(C,mat,mat);CHKERRQ(ierr); 2663 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2664 ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr); 2665 ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr); 2666 } else { 2667 C = *matredundant; 2668 } 2669 2670 /* insert local matrix entries */ 2671 rptr = sbuf_j; 2672 cols = sbuf_j + rend-rstart + 1; 2673 vals = sbuf_a; 2674 for (i=0; i<rend-rstart; i++) { 2675 row = i + rstart; 2676 ncols = rptr[i+1] - rptr[i]; 2677 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2678 vals += ncols; 2679 cols += ncols; 2680 } 2681 /* insert received matrix entries */ 2682 for (imdex=0; imdex<nrecvs; imdex++) { 2683 rstart = rowrange[recv_rank[imdex]]; 2684 rend = rowrange[recv_rank[imdex]+1]; 2685 /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */ 2686 rptr = rbuf_j[imdex]; 2687 cols = rbuf_j[imdex] + rend-rstart + 1; 2688 vals = rbuf_a[imdex]; 2689 for (i=0; i<rend-rstart; i++) { 2690 row = i + rstart; 2691 ncols = rptr[i+1] - rptr[i]; 2692 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2693 vals += ncols; 2694 cols += ncols; 2695 } 2696 } 2697 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2698 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2699 2700 if (reuse == MAT_INITIAL_MATRIX) { 2701 *matredundant = C; 2702 2703 /* create a supporting struct and attach it to C for reuse */ 2704 ierr = PetscNewLog(C,&redund);CHKERRQ(ierr); 2705 if (subsize == 1) { 2706 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2707 c->redundant = redund; 2708 } else { 2709 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2710 c->redundant = redund; 2711 } 2712 2713 redund->nzlocal = nzlocal; 2714 redund->nsends = nsends; 2715 redund->nrecvs = nrecvs; 2716 redund->send_rank = send_rank; 2717 redund->recv_rank = recv_rank; 2718 redund->sbuf_nz = sbuf_nz; 2719 redund->rbuf_nz = rbuf_nz; 2720 redund->sbuf_j = sbuf_j; 2721 redund->sbuf_a = sbuf_a; 2722 redund->rbuf_j = rbuf_j; 2723 redund->rbuf_a = rbuf_a; 2724 redund->psubcomm = NULL; 2725 } 2726 PetscFunctionReturn(0); 2727 } 2728 2729 #undef __FUNCT__ 2730 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2731 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 2732 { 2733 PetscErrorCode ierr; 2734 MPI_Comm comm; 2735 PetscMPIInt size,subsize; 2736 PetscInt mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N; 2737 Mat_Redundant *redund=NULL; 2738 PetscSubcomm psubcomm=NULL; 2739 MPI_Comm subcomm_in=subcomm; 2740 Mat *matseq; 2741 IS isrow,iscol; 2742 2743 PetscFunctionBegin; 2744 if (subcomm_in == MPI_COMM_NULL) { /* user does not provide subcomm */ 2745 if (reuse == MAT_INITIAL_MATRIX) { 2746 /* create psubcomm, then get subcomm */ 2747 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2748 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2749 if (nsubcomm < 1 || nsubcomm > size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nsubcomm must between 1 and %D",size); 2750 2751 ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr); 2752 ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr); 2753 ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr); 2754 ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr); 2755 subcomm = psubcomm->comm; 2756 } else { /* retrieve psubcomm and subcomm */ 2757 ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr); 2758 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2759 if (subsize == 1) { 2760 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2761 redund = c->redundant; 2762 } else { 2763 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2764 redund = c->redundant; 2765 } 2766 psubcomm = redund->psubcomm; 2767 } 2768 if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) { 2769 ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,subcomm,reuse,matredundant);CHKERRQ(ierr); 2770 if (reuse == MAT_INITIAL_MATRIX) { /* psubcomm is created in this routine, free it in MatDestroy_Redundant() */ 2771 ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr); 2772 if (subsize == 1) { 2773 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2774 c->redundant->psubcomm = psubcomm; 2775 } else { 2776 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2777 c->redundant->psubcomm = psubcomm ; 2778 } 2779 } 2780 PetscFunctionReturn(0); 2781 } 2782 } 2783 2784 /* use MPI subcomm via MatGetSubMatrices(); use subcomm_in or psubcomm->comm (psubcomm->type != INTERLACED) */ 2785 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2786 if (reuse == MAT_INITIAL_MATRIX) { 2787 /* create a local sequential matrix matseq[0] */ 2788 mloc_sub = PETSC_DECIDE; 2789 ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr); 2790 ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr); 2791 rstart = rend - mloc_sub; 2792 ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr); 2793 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr); 2794 } else { /* reuse == MAT_REUSE_MATRIX */ 2795 if (subsize == 1) { 2796 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2797 redund = c->redundant; 2798 } else { 2799 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2800 redund = c->redundant; 2801 } 2802 2803 isrow = redund->isrow; 2804 iscol = redund->iscol; 2805 matseq = redund->matseq; 2806 } 2807 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr); 2808 ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr); 2809 2810 if (reuse == MAT_INITIAL_MATRIX) { 2811 /* create a supporting struct and attach it to C for reuse */ 2812 ierr = PetscNewLog(*matredundant,&redund);CHKERRQ(ierr); 2813 if (subsize == 1) { 2814 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2815 c->redundant = redund; 2816 } else { 2817 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2818 c->redundant = redund; 2819 } 2820 redund->isrow = isrow; 2821 redund->iscol = iscol; 2822 redund->matseq = matseq; 2823 redund->psubcomm = psubcomm; 2824 } 2825 PetscFunctionReturn(0); 2826 } 2827 2828 #undef __FUNCT__ 2829 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2830 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2831 { 2832 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2833 PetscErrorCode ierr; 2834 PetscInt i,*idxb = 0; 2835 PetscScalar *va,*vb; 2836 Vec vtmp; 2837 2838 PetscFunctionBegin; 2839 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2840 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2841 if (idx) { 2842 for (i=0; i<A->rmap->n; i++) { 2843 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2844 } 2845 } 2846 2847 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2848 if (idx) { 2849 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2850 } 2851 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2852 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2853 2854 for (i=0; i<A->rmap->n; i++) { 2855 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2856 va[i] = vb[i]; 2857 if (idx) idx[i] = a->garray[idxb[i]]; 2858 } 2859 } 2860 2861 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2862 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2863 ierr = PetscFree(idxb);CHKERRQ(ierr); 2864 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2865 PetscFunctionReturn(0); 2866 } 2867 2868 #undef __FUNCT__ 2869 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2870 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2871 { 2872 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2873 PetscErrorCode ierr; 2874 PetscInt i,*idxb = 0; 2875 PetscScalar *va,*vb; 2876 Vec vtmp; 2877 2878 PetscFunctionBegin; 2879 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2880 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2881 if (idx) { 2882 for (i=0; i<A->cmap->n; i++) { 2883 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2884 } 2885 } 2886 2887 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2888 if (idx) { 2889 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2890 } 2891 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2892 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2893 2894 for (i=0; i<A->rmap->n; i++) { 2895 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2896 va[i] = vb[i]; 2897 if (idx) idx[i] = a->garray[idxb[i]]; 2898 } 2899 } 2900 2901 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2902 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2903 ierr = PetscFree(idxb);CHKERRQ(ierr); 2904 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2905 PetscFunctionReturn(0); 2906 } 2907 2908 #undef __FUNCT__ 2909 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2910 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2911 { 2912 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2913 PetscInt n = A->rmap->n; 2914 PetscInt cstart = A->cmap->rstart; 2915 PetscInt *cmap = mat->garray; 2916 PetscInt *diagIdx, *offdiagIdx; 2917 Vec diagV, offdiagV; 2918 PetscScalar *a, *diagA, *offdiagA; 2919 PetscInt r; 2920 PetscErrorCode ierr; 2921 2922 PetscFunctionBegin; 2923 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2924 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2925 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2926 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2927 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2928 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2929 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2930 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2931 for (r = 0; r < n; ++r) { 2932 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2933 a[r] = diagA[r]; 2934 idx[r] = cstart + diagIdx[r]; 2935 } else { 2936 a[r] = offdiagA[r]; 2937 idx[r] = cmap[offdiagIdx[r]]; 2938 } 2939 } 2940 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2941 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2942 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2943 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2944 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2945 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2946 PetscFunctionReturn(0); 2947 } 2948 2949 #undef __FUNCT__ 2950 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2951 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2952 { 2953 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2954 PetscInt n = A->rmap->n; 2955 PetscInt cstart = A->cmap->rstart; 2956 PetscInt *cmap = mat->garray; 2957 PetscInt *diagIdx, *offdiagIdx; 2958 Vec diagV, offdiagV; 2959 PetscScalar *a, *diagA, *offdiagA; 2960 PetscInt r; 2961 PetscErrorCode ierr; 2962 2963 PetscFunctionBegin; 2964 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2965 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2966 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2967 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2968 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2969 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2970 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2971 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2972 for (r = 0; r < n; ++r) { 2973 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2974 a[r] = diagA[r]; 2975 idx[r] = cstart + diagIdx[r]; 2976 } else { 2977 a[r] = offdiagA[r]; 2978 idx[r] = cmap[offdiagIdx[r]]; 2979 } 2980 } 2981 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2982 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2983 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2984 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2985 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2986 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2987 PetscFunctionReturn(0); 2988 } 2989 2990 #undef __FUNCT__ 2991 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 2992 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2993 { 2994 PetscErrorCode ierr; 2995 Mat *dummy; 2996 2997 PetscFunctionBegin; 2998 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2999 *newmat = *dummy; 3000 ierr = PetscFree(dummy);CHKERRQ(ierr); 3001 PetscFunctionReturn(0); 3002 } 3003 3004 #undef __FUNCT__ 3005 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 3006 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3007 { 3008 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3009 PetscErrorCode ierr; 3010 3011 PetscFunctionBegin; 3012 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3013 PetscFunctionReturn(0); 3014 } 3015 3016 #undef __FUNCT__ 3017 #define __FUNCT__ "MatSetRandom_MPIAIJ" 3018 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 3019 { 3020 PetscErrorCode ierr; 3021 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 3022 3023 PetscFunctionBegin; 3024 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 3025 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 3026 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3027 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3028 PetscFunctionReturn(0); 3029 } 3030 3031 /* -------------------------------------------------------------------*/ 3032 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3033 MatGetRow_MPIAIJ, 3034 MatRestoreRow_MPIAIJ, 3035 MatMult_MPIAIJ, 3036 /* 4*/ MatMultAdd_MPIAIJ, 3037 MatMultTranspose_MPIAIJ, 3038 MatMultTransposeAdd_MPIAIJ, 3039 #if defined(PETSC_HAVE_PBGL) 3040 MatSolve_MPIAIJ, 3041 #else 3042 0, 3043 #endif 3044 0, 3045 0, 3046 /*10*/ 0, 3047 0, 3048 0, 3049 MatSOR_MPIAIJ, 3050 MatTranspose_MPIAIJ, 3051 /*15*/ MatGetInfo_MPIAIJ, 3052 MatEqual_MPIAIJ, 3053 MatGetDiagonal_MPIAIJ, 3054 MatDiagonalScale_MPIAIJ, 3055 MatNorm_MPIAIJ, 3056 /*20*/ MatAssemblyBegin_MPIAIJ, 3057 MatAssemblyEnd_MPIAIJ, 3058 MatSetOption_MPIAIJ, 3059 MatZeroEntries_MPIAIJ, 3060 /*24*/ MatZeroRows_MPIAIJ, 3061 0, 3062 #if defined(PETSC_HAVE_PBGL) 3063 0, 3064 #else 3065 0, 3066 #endif 3067 0, 3068 0, 3069 /*29*/ MatSetUp_MPIAIJ, 3070 #if defined(PETSC_HAVE_PBGL) 3071 0, 3072 #else 3073 0, 3074 #endif 3075 0, 3076 0, 3077 0, 3078 /*34*/ MatDuplicate_MPIAIJ, 3079 0, 3080 0, 3081 0, 3082 0, 3083 /*39*/ MatAXPY_MPIAIJ, 3084 MatGetSubMatrices_MPIAIJ, 3085 MatIncreaseOverlap_MPIAIJ, 3086 MatGetValues_MPIAIJ, 3087 MatCopy_MPIAIJ, 3088 /*44*/ MatGetRowMax_MPIAIJ, 3089 MatScale_MPIAIJ, 3090 0, 3091 0, 3092 MatZeroRowsColumns_MPIAIJ, 3093 /*49*/ MatSetRandom_MPIAIJ, 3094 0, 3095 0, 3096 0, 3097 0, 3098 /*54*/ MatFDColoringCreate_MPIXAIJ, 3099 0, 3100 MatSetUnfactored_MPIAIJ, 3101 MatPermute_MPIAIJ, 3102 0, 3103 /*59*/ MatGetSubMatrix_MPIAIJ, 3104 MatDestroy_MPIAIJ, 3105 MatView_MPIAIJ, 3106 0, 3107 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 3108 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 3109 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 3110 0, 3111 0, 3112 0, 3113 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3114 MatGetRowMinAbs_MPIAIJ, 3115 0, 3116 MatSetColoring_MPIAIJ, 3117 0, 3118 MatSetValuesAdifor_MPIAIJ, 3119 /*75*/ MatFDColoringApply_AIJ, 3120 0, 3121 0, 3122 0, 3123 MatFindZeroDiagonals_MPIAIJ, 3124 /*80*/ 0, 3125 0, 3126 0, 3127 /*83*/ MatLoad_MPIAIJ, 3128 0, 3129 0, 3130 0, 3131 0, 3132 0, 3133 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3134 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3135 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3136 MatPtAP_MPIAIJ_MPIAIJ, 3137 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3138 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 3139 0, 3140 0, 3141 0, 3142 0, 3143 /*99*/ 0, 3144 0, 3145 0, 3146 MatConjugate_MPIAIJ, 3147 0, 3148 /*104*/MatSetValuesRow_MPIAIJ, 3149 MatRealPart_MPIAIJ, 3150 MatImaginaryPart_MPIAIJ, 3151 0, 3152 0, 3153 /*109*/0, 3154 MatGetRedundantMatrix_MPIAIJ, 3155 MatGetRowMin_MPIAIJ, 3156 0, 3157 0, 3158 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3159 0, 3160 0, 3161 0, 3162 0, 3163 /*119*/0, 3164 0, 3165 0, 3166 0, 3167 MatGetMultiProcBlock_MPIAIJ, 3168 /*124*/MatFindNonzeroRows_MPIAIJ, 3169 MatGetColumnNorms_MPIAIJ, 3170 MatInvertBlockDiagonal_MPIAIJ, 3171 0, 3172 MatGetSubMatricesParallel_MPIAIJ, 3173 /*129*/0, 3174 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3175 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3176 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3177 0, 3178 /*134*/0, 3179 0, 3180 0, 3181 0, 3182 0, 3183 /*139*/0, 3184 0, 3185 0, 3186 MatFDColoringSetUp_MPIXAIJ 3187 }; 3188 3189 /* ----------------------------------------------------------------------------------------*/ 3190 3191 #undef __FUNCT__ 3192 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3193 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3194 { 3195 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3196 PetscErrorCode ierr; 3197 3198 PetscFunctionBegin; 3199 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3200 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3201 PetscFunctionReturn(0); 3202 } 3203 3204 #undef __FUNCT__ 3205 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3206 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3207 { 3208 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3209 PetscErrorCode ierr; 3210 3211 PetscFunctionBegin; 3212 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3213 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3214 PetscFunctionReturn(0); 3215 } 3216 3217 #undef __FUNCT__ 3218 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3219 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3220 { 3221 Mat_MPIAIJ *b; 3222 PetscErrorCode ierr; 3223 3224 PetscFunctionBegin; 3225 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3226 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3227 b = (Mat_MPIAIJ*)B->data; 3228 3229 if (!B->preallocated) { 3230 /* Explicitly create 2 MATSEQAIJ matrices. */ 3231 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3232 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3233 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 3234 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3235 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 3236 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3237 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3238 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 3239 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3240 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 3241 } 3242 3243 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3244 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3245 B->preallocated = PETSC_TRUE; 3246 PetscFunctionReturn(0); 3247 } 3248 3249 #undef __FUNCT__ 3250 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3251 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3252 { 3253 Mat mat; 3254 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3255 PetscErrorCode ierr; 3256 3257 PetscFunctionBegin; 3258 *newmat = 0; 3259 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3260 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3261 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 3262 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3263 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3264 a = (Mat_MPIAIJ*)mat->data; 3265 3266 mat->factortype = matin->factortype; 3267 mat->assembled = PETSC_TRUE; 3268 mat->insertmode = NOT_SET_VALUES; 3269 mat->preallocated = PETSC_TRUE; 3270 3271 a->size = oldmat->size; 3272 a->rank = oldmat->rank; 3273 a->donotstash = oldmat->donotstash; 3274 a->roworiented = oldmat->roworiented; 3275 a->rowindices = 0; 3276 a->rowvalues = 0; 3277 a->getrowactive = PETSC_FALSE; 3278 3279 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3280 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3281 3282 if (oldmat->colmap) { 3283 #if defined(PETSC_USE_CTABLE) 3284 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3285 #else 3286 ierr = PetscMalloc1((mat->cmap->N),&a->colmap);CHKERRQ(ierr); 3287 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3288 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3289 #endif 3290 } else a->colmap = 0; 3291 if (oldmat->garray) { 3292 PetscInt len; 3293 len = oldmat->B->cmap->n; 3294 ierr = PetscMalloc1((len+1),&a->garray);CHKERRQ(ierr); 3295 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3296 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3297 } else a->garray = 0; 3298 3299 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3300 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 3301 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3302 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 3303 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3304 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 3305 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3306 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3307 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3308 *newmat = mat; 3309 PetscFunctionReturn(0); 3310 } 3311 3312 3313 3314 #undef __FUNCT__ 3315 #define __FUNCT__ "MatLoad_MPIAIJ" 3316 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3317 { 3318 PetscScalar *vals,*svals; 3319 MPI_Comm comm; 3320 PetscErrorCode ierr; 3321 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3322 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 3323 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3324 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 3325 PetscInt cend,cstart,n,*rowners; 3326 int fd; 3327 PetscInt bs = newMat->rmap->bs; 3328 3329 PetscFunctionBegin; 3330 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3331 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3332 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3333 if (!rank) { 3334 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3335 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3336 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3337 } 3338 3339 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3340 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3341 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3342 if (bs < 0) bs = 1; 3343 3344 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3345 M = header[1]; N = header[2]; 3346 3347 /* If global sizes are set, check if they are consistent with that given in the file */ 3348 if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M); 3349 if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N); 3350 3351 /* determine ownership of all (block) rows */ 3352 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3353 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3354 else m = newMat->rmap->n; /* Set by user */ 3355 3356 ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr); 3357 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3358 3359 /* First process needs enough room for process with most rows */ 3360 if (!rank) { 3361 mmax = rowners[1]; 3362 for (i=2; i<=size; i++) { 3363 mmax = PetscMax(mmax, rowners[i]); 3364 } 3365 } else mmax = -1; /* unused, but compilers complain */ 3366 3367 rowners[0] = 0; 3368 for (i=2; i<=size; i++) { 3369 rowners[i] += rowners[i-1]; 3370 } 3371 rstart = rowners[rank]; 3372 rend = rowners[rank+1]; 3373 3374 /* distribute row lengths to all processors */ 3375 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 3376 if (!rank) { 3377 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3378 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 3379 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 3380 for (j=0; j<m; j++) { 3381 procsnz[0] += ourlens[j]; 3382 } 3383 for (i=1; i<size; i++) { 3384 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3385 /* calculate the number of nonzeros on each processor */ 3386 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3387 procsnz[i] += rowlengths[j]; 3388 } 3389 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3390 } 3391 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3392 } else { 3393 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3394 } 3395 3396 if (!rank) { 3397 /* determine max buffer needed and allocate it */ 3398 maxnz = 0; 3399 for (i=0; i<size; i++) { 3400 maxnz = PetscMax(maxnz,procsnz[i]); 3401 } 3402 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 3403 3404 /* read in my part of the matrix column indices */ 3405 nz = procsnz[0]; 3406 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3407 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3408 3409 /* read in every one elses and ship off */ 3410 for (i=1; i<size; i++) { 3411 nz = procsnz[i]; 3412 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3413 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3414 } 3415 ierr = PetscFree(cols);CHKERRQ(ierr); 3416 } else { 3417 /* determine buffer space needed for message */ 3418 nz = 0; 3419 for (i=0; i<m; i++) { 3420 nz += ourlens[i]; 3421 } 3422 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3423 3424 /* receive message of column indices*/ 3425 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3426 } 3427 3428 /* determine column ownership if matrix is not square */ 3429 if (N != M) { 3430 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3431 else n = newMat->cmap->n; 3432 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3433 cstart = cend - n; 3434 } else { 3435 cstart = rstart; 3436 cend = rend; 3437 n = cend - cstart; 3438 } 3439 3440 /* loop over local rows, determining number of off diagonal entries */ 3441 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3442 jj = 0; 3443 for (i=0; i<m; i++) { 3444 for (j=0; j<ourlens[i]; j++) { 3445 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3446 jj++; 3447 } 3448 } 3449 3450 for (i=0; i<m; i++) { 3451 ourlens[i] -= offlens[i]; 3452 } 3453 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3454 3455 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3456 3457 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3458 3459 for (i=0; i<m; i++) { 3460 ourlens[i] += offlens[i]; 3461 } 3462 3463 if (!rank) { 3464 ierr = PetscMalloc1((maxnz+1),&vals);CHKERRQ(ierr); 3465 3466 /* read in my part of the matrix numerical values */ 3467 nz = procsnz[0]; 3468 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3469 3470 /* insert into matrix */ 3471 jj = rstart; 3472 smycols = mycols; 3473 svals = vals; 3474 for (i=0; i<m; i++) { 3475 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3476 smycols += ourlens[i]; 3477 svals += ourlens[i]; 3478 jj++; 3479 } 3480 3481 /* read in other processors and ship out */ 3482 for (i=1; i<size; i++) { 3483 nz = procsnz[i]; 3484 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3485 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3486 } 3487 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3488 } else { 3489 /* receive numeric values */ 3490 ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr); 3491 3492 /* receive message of values*/ 3493 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3494 3495 /* insert into matrix */ 3496 jj = rstart; 3497 smycols = mycols; 3498 svals = vals; 3499 for (i=0; i<m; i++) { 3500 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3501 smycols += ourlens[i]; 3502 svals += ourlens[i]; 3503 jj++; 3504 } 3505 } 3506 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3507 ierr = PetscFree(vals);CHKERRQ(ierr); 3508 ierr = PetscFree(mycols);CHKERRQ(ierr); 3509 ierr = PetscFree(rowners);CHKERRQ(ierr); 3510 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3511 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3512 PetscFunctionReturn(0); 3513 } 3514 3515 #undef __FUNCT__ 3516 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3517 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3518 { 3519 PetscErrorCode ierr; 3520 IS iscol_local; 3521 PetscInt csize; 3522 3523 PetscFunctionBegin; 3524 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3525 if (call == MAT_REUSE_MATRIX) { 3526 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3527 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3528 } else { 3529 PetscInt cbs; 3530 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3531 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3532 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3533 } 3534 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3535 if (call == MAT_INITIAL_MATRIX) { 3536 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3537 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3538 } 3539 PetscFunctionReturn(0); 3540 } 3541 3542 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3543 #undef __FUNCT__ 3544 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3545 /* 3546 Not great since it makes two copies of the submatrix, first an SeqAIJ 3547 in local and then by concatenating the local matrices the end result. 3548 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3549 3550 Note: This requires a sequential iscol with all indices. 3551 */ 3552 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3553 { 3554 PetscErrorCode ierr; 3555 PetscMPIInt rank,size; 3556 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3557 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3558 PetscBool allcolumns, colflag; 3559 Mat M,Mreuse; 3560 MatScalar *vwork,*aa; 3561 MPI_Comm comm; 3562 Mat_SeqAIJ *aij; 3563 3564 PetscFunctionBegin; 3565 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3566 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3567 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3568 3569 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3570 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3571 if (colflag && ncol == mat->cmap->N) { 3572 allcolumns = PETSC_TRUE; 3573 } else { 3574 allcolumns = PETSC_FALSE; 3575 } 3576 if (call == MAT_REUSE_MATRIX) { 3577 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3578 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3579 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3580 } else { 3581 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3582 } 3583 3584 /* 3585 m - number of local rows 3586 n - number of columns (same on all processors) 3587 rstart - first row in new global matrix generated 3588 */ 3589 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3590 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3591 if (call == MAT_INITIAL_MATRIX) { 3592 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3593 ii = aij->i; 3594 jj = aij->j; 3595 3596 /* 3597 Determine the number of non-zeros in the diagonal and off-diagonal 3598 portions of the matrix in order to do correct preallocation 3599 */ 3600 3601 /* first get start and end of "diagonal" columns */ 3602 if (csize == PETSC_DECIDE) { 3603 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3604 if (mglobal == n) { /* square matrix */ 3605 nlocal = m; 3606 } else { 3607 nlocal = n/size + ((n % size) > rank); 3608 } 3609 } else { 3610 nlocal = csize; 3611 } 3612 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3613 rstart = rend - nlocal; 3614 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3615 3616 /* next, compute all the lengths */ 3617 ierr = PetscMalloc1((2*m+1),&dlens);CHKERRQ(ierr); 3618 olens = dlens + m; 3619 for (i=0; i<m; i++) { 3620 jend = ii[i+1] - ii[i]; 3621 olen = 0; 3622 dlen = 0; 3623 for (j=0; j<jend; j++) { 3624 if (*jj < rstart || *jj >= rend) olen++; 3625 else dlen++; 3626 jj++; 3627 } 3628 olens[i] = olen; 3629 dlens[i] = dlen; 3630 } 3631 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3632 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3633 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3634 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3635 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3636 ierr = PetscFree(dlens);CHKERRQ(ierr); 3637 } else { 3638 PetscInt ml,nl; 3639 3640 M = *newmat; 3641 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3642 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3643 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3644 /* 3645 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3646 rather than the slower MatSetValues(). 3647 */ 3648 M->was_assembled = PETSC_TRUE; 3649 M->assembled = PETSC_FALSE; 3650 } 3651 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3652 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3653 ii = aij->i; 3654 jj = aij->j; 3655 aa = aij->a; 3656 for (i=0; i<m; i++) { 3657 row = rstart + i; 3658 nz = ii[i+1] - ii[i]; 3659 cwork = jj; jj += nz; 3660 vwork = aa; aa += nz; 3661 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3662 } 3663 3664 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3665 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3666 *newmat = M; 3667 3668 /* save submatrix used in processor for next request */ 3669 if (call == MAT_INITIAL_MATRIX) { 3670 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3671 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3672 } 3673 PetscFunctionReturn(0); 3674 } 3675 3676 #undef __FUNCT__ 3677 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3678 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3679 { 3680 PetscInt m,cstart, cend,j,nnz,i,d; 3681 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3682 const PetscInt *JJ; 3683 PetscScalar *values; 3684 PetscErrorCode ierr; 3685 3686 PetscFunctionBegin; 3687 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3688 3689 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3690 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3691 m = B->rmap->n; 3692 cstart = B->cmap->rstart; 3693 cend = B->cmap->rend; 3694 rstart = B->rmap->rstart; 3695 3696 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3697 3698 #if defined(PETSC_USE_DEBUGGING) 3699 for (i=0; i<m; i++) { 3700 nnz = Ii[i+1]- Ii[i]; 3701 JJ = J + Ii[i]; 3702 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3703 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3704 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3705 } 3706 #endif 3707 3708 for (i=0; i<m; i++) { 3709 nnz = Ii[i+1]- Ii[i]; 3710 JJ = J + Ii[i]; 3711 nnz_max = PetscMax(nnz_max,nnz); 3712 d = 0; 3713 for (j=0; j<nnz; j++) { 3714 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3715 } 3716 d_nnz[i] = d; 3717 o_nnz[i] = nnz - d; 3718 } 3719 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3720 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3721 3722 if (v) values = (PetscScalar*)v; 3723 else { 3724 ierr = PetscCalloc1((nnz_max+1),&values);CHKERRQ(ierr); 3725 } 3726 3727 for (i=0; i<m; i++) { 3728 ii = i + rstart; 3729 nnz = Ii[i+1]- Ii[i]; 3730 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3731 } 3732 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3733 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3734 3735 if (!v) { 3736 ierr = PetscFree(values);CHKERRQ(ierr); 3737 } 3738 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3739 PetscFunctionReturn(0); 3740 } 3741 3742 #undef __FUNCT__ 3743 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3744 /*@ 3745 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3746 (the default parallel PETSc format). 3747 3748 Collective on MPI_Comm 3749 3750 Input Parameters: 3751 + B - the matrix 3752 . i - the indices into j for the start of each local row (starts with zero) 3753 . j - the column indices for each local row (starts with zero) 3754 - v - optional values in the matrix 3755 3756 Level: developer 3757 3758 Notes: 3759 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3760 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3761 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3762 3763 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3764 3765 The format which is used for the sparse matrix input, is equivalent to a 3766 row-major ordering.. i.e for the following matrix, the input data expected is 3767 as shown: 3768 3769 1 0 0 3770 2 0 3 P0 3771 ------- 3772 4 5 6 P1 3773 3774 Process0 [P0]: rows_owned=[0,1] 3775 i = {0,1,3} [size = nrow+1 = 2+1] 3776 j = {0,0,2} [size = nz = 6] 3777 v = {1,2,3} [size = nz = 6] 3778 3779 Process1 [P1]: rows_owned=[2] 3780 i = {0,3} [size = nrow+1 = 1+1] 3781 j = {0,1,2} [size = nz = 6] 3782 v = {4,5,6} [size = nz = 6] 3783 3784 .keywords: matrix, aij, compressed row, sparse, parallel 3785 3786 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3787 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3788 @*/ 3789 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3790 { 3791 PetscErrorCode ierr; 3792 3793 PetscFunctionBegin; 3794 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3795 PetscFunctionReturn(0); 3796 } 3797 3798 #undef __FUNCT__ 3799 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3800 /*@C 3801 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3802 (the default parallel PETSc format). For good matrix assembly performance 3803 the user should preallocate the matrix storage by setting the parameters 3804 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3805 performance can be increased by more than a factor of 50. 3806 3807 Collective on MPI_Comm 3808 3809 Input Parameters: 3810 + B - the matrix 3811 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3812 (same value is used for all local rows) 3813 . d_nnz - array containing the number of nonzeros in the various rows of the 3814 DIAGONAL portion of the local submatrix (possibly different for each row) 3815 or NULL, if d_nz is used to specify the nonzero structure. 3816 The size of this array is equal to the number of local rows, i.e 'm'. 3817 For matrices that will be factored, you must leave room for (and set) 3818 the diagonal entry even if it is zero. 3819 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3820 submatrix (same value is used for all local rows). 3821 - o_nnz - array containing the number of nonzeros in the various rows of the 3822 OFF-DIAGONAL portion of the local submatrix (possibly different for 3823 each row) or NULL, if o_nz is used to specify the nonzero 3824 structure. The size of this array is equal to the number 3825 of local rows, i.e 'm'. 3826 3827 If the *_nnz parameter is given then the *_nz parameter is ignored 3828 3829 The AIJ format (also called the Yale sparse matrix format or 3830 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3831 storage. The stored row and column indices begin with zero. 3832 See Users-Manual: ch_mat for details. 3833 3834 The parallel matrix is partitioned such that the first m0 rows belong to 3835 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3836 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3837 3838 The DIAGONAL portion of the local submatrix of a processor can be defined 3839 as the submatrix which is obtained by extraction the part corresponding to 3840 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3841 first row that belongs to the processor, r2 is the last row belonging to 3842 the this processor, and c1-c2 is range of indices of the local part of a 3843 vector suitable for applying the matrix to. This is an mxn matrix. In the 3844 common case of a square matrix, the row and column ranges are the same and 3845 the DIAGONAL part is also square. The remaining portion of the local 3846 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3847 3848 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3849 3850 You can call MatGetInfo() to get information on how effective the preallocation was; 3851 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3852 You can also run with the option -info and look for messages with the string 3853 malloc in them to see if additional memory allocation was needed. 3854 3855 Example usage: 3856 3857 Consider the following 8x8 matrix with 34 non-zero values, that is 3858 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3859 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3860 as follows: 3861 3862 .vb 3863 1 2 0 | 0 3 0 | 0 4 3864 Proc0 0 5 6 | 7 0 0 | 8 0 3865 9 0 10 | 11 0 0 | 12 0 3866 ------------------------------------- 3867 13 0 14 | 15 16 17 | 0 0 3868 Proc1 0 18 0 | 19 20 21 | 0 0 3869 0 0 0 | 22 23 0 | 24 0 3870 ------------------------------------- 3871 Proc2 25 26 27 | 0 0 28 | 29 0 3872 30 0 0 | 31 32 33 | 0 34 3873 .ve 3874 3875 This can be represented as a collection of submatrices as: 3876 3877 .vb 3878 A B C 3879 D E F 3880 G H I 3881 .ve 3882 3883 Where the submatrices A,B,C are owned by proc0, D,E,F are 3884 owned by proc1, G,H,I are owned by proc2. 3885 3886 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3887 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3888 The 'M','N' parameters are 8,8, and have the same values on all procs. 3889 3890 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3891 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3892 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3893 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3894 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3895 matrix, ans [DF] as another SeqAIJ matrix. 3896 3897 When d_nz, o_nz parameters are specified, d_nz storage elements are 3898 allocated for every row of the local diagonal submatrix, and o_nz 3899 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3900 One way to choose d_nz and o_nz is to use the max nonzerors per local 3901 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3902 In this case, the values of d_nz,o_nz are: 3903 .vb 3904 proc0 : dnz = 2, o_nz = 2 3905 proc1 : dnz = 3, o_nz = 2 3906 proc2 : dnz = 1, o_nz = 4 3907 .ve 3908 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3909 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3910 for proc3. i.e we are using 12+15+10=37 storage locations to store 3911 34 values. 3912 3913 When d_nnz, o_nnz parameters are specified, the storage is specified 3914 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3915 In the above case the values for d_nnz,o_nnz are: 3916 .vb 3917 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3918 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3919 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3920 .ve 3921 Here the space allocated is sum of all the above values i.e 34, and 3922 hence pre-allocation is perfect. 3923 3924 Level: intermediate 3925 3926 .keywords: matrix, aij, compressed row, sparse, parallel 3927 3928 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3929 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 3930 @*/ 3931 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3932 { 3933 PetscErrorCode ierr; 3934 3935 PetscFunctionBegin; 3936 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3937 PetscValidType(B,1); 3938 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3939 PetscFunctionReturn(0); 3940 } 3941 3942 #undef __FUNCT__ 3943 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3944 /*@ 3945 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3946 CSR format the local rows. 3947 3948 Collective on MPI_Comm 3949 3950 Input Parameters: 3951 + comm - MPI communicator 3952 . m - number of local rows (Cannot be PETSC_DECIDE) 3953 . n - This value should be the same as the local size used in creating the 3954 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3955 calculated if N is given) For square matrices n is almost always m. 3956 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3957 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3958 . i - row indices 3959 . j - column indices 3960 - a - matrix values 3961 3962 Output Parameter: 3963 . mat - the matrix 3964 3965 Level: intermediate 3966 3967 Notes: 3968 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3969 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3970 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3971 3972 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3973 3974 The format which is used for the sparse matrix input, is equivalent to a 3975 row-major ordering.. i.e for the following matrix, the input data expected is 3976 as shown: 3977 3978 1 0 0 3979 2 0 3 P0 3980 ------- 3981 4 5 6 P1 3982 3983 Process0 [P0]: rows_owned=[0,1] 3984 i = {0,1,3} [size = nrow+1 = 2+1] 3985 j = {0,0,2} [size = nz = 6] 3986 v = {1,2,3} [size = nz = 6] 3987 3988 Process1 [P1]: rows_owned=[2] 3989 i = {0,3} [size = nrow+1 = 1+1] 3990 j = {0,1,2} [size = nz = 6] 3991 v = {4,5,6} [size = nz = 6] 3992 3993 .keywords: matrix, aij, compressed row, sparse, parallel 3994 3995 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3996 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3997 @*/ 3998 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3999 { 4000 PetscErrorCode ierr; 4001 4002 PetscFunctionBegin; 4003 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4004 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4005 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4006 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4007 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4008 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4009 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4010 PetscFunctionReturn(0); 4011 } 4012 4013 #undef __FUNCT__ 4014 #define __FUNCT__ "MatCreateAIJ" 4015 /*@C 4016 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4017 (the default parallel PETSc format). For good matrix assembly performance 4018 the user should preallocate the matrix storage by setting the parameters 4019 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4020 performance can be increased by more than a factor of 50. 4021 4022 Collective on MPI_Comm 4023 4024 Input Parameters: 4025 + comm - MPI communicator 4026 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4027 This value should be the same as the local size used in creating the 4028 y vector for the matrix-vector product y = Ax. 4029 . n - This value should be the same as the local size used in creating the 4030 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4031 calculated if N is given) For square matrices n is almost always m. 4032 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4033 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4034 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4035 (same value is used for all local rows) 4036 . d_nnz - array containing the number of nonzeros in the various rows of the 4037 DIAGONAL portion of the local submatrix (possibly different for each row) 4038 or NULL, if d_nz is used to specify the nonzero structure. 4039 The size of this array is equal to the number of local rows, i.e 'm'. 4040 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4041 submatrix (same value is used for all local rows). 4042 - o_nnz - array containing the number of nonzeros in the various rows of the 4043 OFF-DIAGONAL portion of the local submatrix (possibly different for 4044 each row) or NULL, if o_nz is used to specify the nonzero 4045 structure. The size of this array is equal to the number 4046 of local rows, i.e 'm'. 4047 4048 Output Parameter: 4049 . A - the matrix 4050 4051 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4052 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4053 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4054 4055 Notes: 4056 If the *_nnz parameter is given then the *_nz parameter is ignored 4057 4058 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4059 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4060 storage requirements for this matrix. 4061 4062 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4063 processor than it must be used on all processors that share the object for 4064 that argument. 4065 4066 The user MUST specify either the local or global matrix dimensions 4067 (possibly both). 4068 4069 The parallel matrix is partitioned across processors such that the 4070 first m0 rows belong to process 0, the next m1 rows belong to 4071 process 1, the next m2 rows belong to process 2 etc.. where 4072 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4073 values corresponding to [m x N] submatrix. 4074 4075 The columns are logically partitioned with the n0 columns belonging 4076 to 0th partition, the next n1 columns belonging to the next 4077 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4078 4079 The DIAGONAL portion of the local submatrix on any given processor 4080 is the submatrix corresponding to the rows and columns m,n 4081 corresponding to the given processor. i.e diagonal matrix on 4082 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4083 etc. The remaining portion of the local submatrix [m x (N-n)] 4084 constitute the OFF-DIAGONAL portion. The example below better 4085 illustrates this concept. 4086 4087 For a square global matrix we define each processor's diagonal portion 4088 to be its local rows and the corresponding columns (a square submatrix); 4089 each processor's off-diagonal portion encompasses the remainder of the 4090 local matrix (a rectangular submatrix). 4091 4092 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4093 4094 When calling this routine with a single process communicator, a matrix of 4095 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4096 type of communicator, use the construction mechanism: 4097 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4098 4099 By default, this format uses inodes (identical nodes) when possible. 4100 We search for consecutive rows with the same nonzero structure, thereby 4101 reusing matrix information to achieve increased efficiency. 4102 4103 Options Database Keys: 4104 + -mat_no_inode - Do not use inodes 4105 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4106 - -mat_aij_oneindex - Internally use indexing starting at 1 4107 rather than 0. Note that when calling MatSetValues(), 4108 the user still MUST index entries starting at 0! 4109 4110 4111 Example usage: 4112 4113 Consider the following 8x8 matrix with 34 non-zero values, that is 4114 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4115 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4116 as follows: 4117 4118 .vb 4119 1 2 0 | 0 3 0 | 0 4 4120 Proc0 0 5 6 | 7 0 0 | 8 0 4121 9 0 10 | 11 0 0 | 12 0 4122 ------------------------------------- 4123 13 0 14 | 15 16 17 | 0 0 4124 Proc1 0 18 0 | 19 20 21 | 0 0 4125 0 0 0 | 22 23 0 | 24 0 4126 ------------------------------------- 4127 Proc2 25 26 27 | 0 0 28 | 29 0 4128 30 0 0 | 31 32 33 | 0 34 4129 .ve 4130 4131 This can be represented as a collection of submatrices as: 4132 4133 .vb 4134 A B C 4135 D E F 4136 G H I 4137 .ve 4138 4139 Where the submatrices A,B,C are owned by proc0, D,E,F are 4140 owned by proc1, G,H,I are owned by proc2. 4141 4142 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4143 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4144 The 'M','N' parameters are 8,8, and have the same values on all procs. 4145 4146 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4147 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4148 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4149 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4150 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4151 matrix, ans [DF] as another SeqAIJ matrix. 4152 4153 When d_nz, o_nz parameters are specified, d_nz storage elements are 4154 allocated for every row of the local diagonal submatrix, and o_nz 4155 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4156 One way to choose d_nz and o_nz is to use the max nonzerors per local 4157 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4158 In this case, the values of d_nz,o_nz are: 4159 .vb 4160 proc0 : dnz = 2, o_nz = 2 4161 proc1 : dnz = 3, o_nz = 2 4162 proc2 : dnz = 1, o_nz = 4 4163 .ve 4164 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4165 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4166 for proc3. i.e we are using 12+15+10=37 storage locations to store 4167 34 values. 4168 4169 When d_nnz, o_nnz parameters are specified, the storage is specified 4170 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4171 In the above case the values for d_nnz,o_nnz are: 4172 .vb 4173 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4174 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4175 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4176 .ve 4177 Here the space allocated is sum of all the above values i.e 34, and 4178 hence pre-allocation is perfect. 4179 4180 Level: intermediate 4181 4182 .keywords: matrix, aij, compressed row, sparse, parallel 4183 4184 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4185 MPIAIJ, MatCreateMPIAIJWithArrays() 4186 @*/ 4187 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4188 { 4189 PetscErrorCode ierr; 4190 PetscMPIInt size; 4191 4192 PetscFunctionBegin; 4193 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4194 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4195 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4196 if (size > 1) { 4197 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4198 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4199 } else { 4200 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4201 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4202 } 4203 PetscFunctionReturn(0); 4204 } 4205 4206 #undef __FUNCT__ 4207 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4208 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4209 { 4210 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4211 4212 PetscFunctionBegin; 4213 if (Ad) *Ad = a->A; 4214 if (Ao) *Ao = a->B; 4215 if (colmap) *colmap = a->garray; 4216 PetscFunctionReturn(0); 4217 } 4218 4219 #undef __FUNCT__ 4220 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4221 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4222 { 4223 PetscErrorCode ierr; 4224 PetscInt i; 4225 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4226 4227 PetscFunctionBegin; 4228 if (coloring->ctype == IS_COLORING_GLOBAL) { 4229 ISColoringValue *allcolors,*colors; 4230 ISColoring ocoloring; 4231 4232 /* set coloring for diagonal portion */ 4233 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4234 4235 /* set coloring for off-diagonal portion */ 4236 ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr); 4237 ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr); 4238 for (i=0; i<a->B->cmap->n; i++) { 4239 colors[i] = allcolors[a->garray[i]]; 4240 } 4241 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4242 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4243 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4244 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4245 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4246 ISColoringValue *colors; 4247 PetscInt *larray; 4248 ISColoring ocoloring; 4249 4250 /* set coloring for diagonal portion */ 4251 ierr = PetscMalloc1((a->A->cmap->n+1),&larray);CHKERRQ(ierr); 4252 for (i=0; i<a->A->cmap->n; i++) { 4253 larray[i] = i + A->cmap->rstart; 4254 } 4255 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr); 4256 ierr = PetscMalloc1((a->A->cmap->n+1),&colors);CHKERRQ(ierr); 4257 for (i=0; i<a->A->cmap->n; i++) { 4258 colors[i] = coloring->colors[larray[i]]; 4259 } 4260 ierr = PetscFree(larray);CHKERRQ(ierr); 4261 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4262 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4263 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4264 4265 /* set coloring for off-diagonal portion */ 4266 ierr = PetscMalloc1((a->B->cmap->n+1),&larray);CHKERRQ(ierr); 4267 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr); 4268 ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr); 4269 for (i=0; i<a->B->cmap->n; i++) { 4270 colors[i] = coloring->colors[larray[i]]; 4271 } 4272 ierr = PetscFree(larray);CHKERRQ(ierr); 4273 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4274 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4275 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4276 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4277 PetscFunctionReturn(0); 4278 } 4279 4280 #undef __FUNCT__ 4281 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4282 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4283 { 4284 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4285 PetscErrorCode ierr; 4286 4287 PetscFunctionBegin; 4288 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4289 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4290 PetscFunctionReturn(0); 4291 } 4292 4293 #undef __FUNCT__ 4294 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4295 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4296 { 4297 PetscErrorCode ierr; 4298 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4299 PetscInt *indx; 4300 4301 PetscFunctionBegin; 4302 /* This routine will ONLY return MPIAIJ type matrix */ 4303 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4304 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4305 if (n == PETSC_DECIDE) { 4306 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4307 } 4308 /* Check sum(n) = N */ 4309 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4310 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4311 4312 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4313 rstart -= m; 4314 4315 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4316 for (i=0; i<m; i++) { 4317 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4318 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4319 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4320 } 4321 4322 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4323 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4324 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4325 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4326 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4327 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4328 PetscFunctionReturn(0); 4329 } 4330 4331 #undef __FUNCT__ 4332 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4333 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4334 { 4335 PetscErrorCode ierr; 4336 PetscInt m,N,i,rstart,nnz,Ii; 4337 PetscInt *indx; 4338 PetscScalar *values; 4339 4340 PetscFunctionBegin; 4341 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4342 ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr); 4343 for (i=0; i<m; i++) { 4344 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4345 Ii = i + rstart; 4346 ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4347 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4348 } 4349 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4350 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4351 PetscFunctionReturn(0); 4352 } 4353 4354 #undef __FUNCT__ 4355 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4356 /*@ 4357 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4358 matrices from each processor 4359 4360 Collective on MPI_Comm 4361 4362 Input Parameters: 4363 + comm - the communicators the parallel matrix will live on 4364 . inmat - the input sequential matrices 4365 . n - number of local columns (or PETSC_DECIDE) 4366 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4367 4368 Output Parameter: 4369 . outmat - the parallel matrix generated 4370 4371 Level: advanced 4372 4373 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4374 4375 @*/ 4376 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4377 { 4378 PetscErrorCode ierr; 4379 PetscMPIInt size; 4380 4381 PetscFunctionBegin; 4382 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4383 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4384 if (size == 1) { 4385 if (scall == MAT_INITIAL_MATRIX) { 4386 ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr); 4387 } else { 4388 ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4389 } 4390 } else { 4391 if (scall == MAT_INITIAL_MATRIX) { 4392 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4393 } 4394 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4395 } 4396 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4397 PetscFunctionReturn(0); 4398 } 4399 4400 #undef __FUNCT__ 4401 #define __FUNCT__ "MatFileSplit" 4402 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4403 { 4404 PetscErrorCode ierr; 4405 PetscMPIInt rank; 4406 PetscInt m,N,i,rstart,nnz; 4407 size_t len; 4408 const PetscInt *indx; 4409 PetscViewer out; 4410 char *name; 4411 Mat B; 4412 const PetscScalar *values; 4413 4414 PetscFunctionBegin; 4415 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4416 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4417 /* Should this be the type of the diagonal block of A? */ 4418 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4419 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4420 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4421 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4422 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4423 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4424 for (i=0; i<m; i++) { 4425 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4426 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4427 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4428 } 4429 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4430 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4431 4432 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4433 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4434 ierr = PetscMalloc1((len+5),&name);CHKERRQ(ierr); 4435 sprintf(name,"%s.%d",outfile,rank); 4436 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4437 ierr = PetscFree(name);CHKERRQ(ierr); 4438 ierr = MatView(B,out);CHKERRQ(ierr); 4439 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4440 ierr = MatDestroy(&B);CHKERRQ(ierr); 4441 PetscFunctionReturn(0); 4442 } 4443 4444 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4445 #undef __FUNCT__ 4446 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4447 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4448 { 4449 PetscErrorCode ierr; 4450 Mat_Merge_SeqsToMPI *merge; 4451 PetscContainer container; 4452 4453 PetscFunctionBegin; 4454 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4455 if (container) { 4456 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4457 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4458 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4459 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4460 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4461 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4462 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4463 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4464 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4465 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4466 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4467 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4468 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4469 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4470 ierr = PetscFree(merge);CHKERRQ(ierr); 4471 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4472 } 4473 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4474 PetscFunctionReturn(0); 4475 } 4476 4477 #include <../src/mat/utils/freespace.h> 4478 #include <petscbt.h> 4479 4480 #undef __FUNCT__ 4481 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4482 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4483 { 4484 PetscErrorCode ierr; 4485 MPI_Comm comm; 4486 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4487 PetscMPIInt size,rank,taga,*len_s; 4488 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4489 PetscInt proc,m; 4490 PetscInt **buf_ri,**buf_rj; 4491 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4492 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4493 MPI_Request *s_waits,*r_waits; 4494 MPI_Status *status; 4495 MatScalar *aa=a->a; 4496 MatScalar **abuf_r,*ba_i; 4497 Mat_Merge_SeqsToMPI *merge; 4498 PetscContainer container; 4499 4500 PetscFunctionBegin; 4501 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4502 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4503 4504 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4505 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4506 4507 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4508 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4509 4510 bi = merge->bi; 4511 bj = merge->bj; 4512 buf_ri = merge->buf_ri; 4513 buf_rj = merge->buf_rj; 4514 4515 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4516 owners = merge->rowmap->range; 4517 len_s = merge->len_s; 4518 4519 /* send and recv matrix values */ 4520 /*-----------------------------*/ 4521 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4522 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4523 4524 ierr = PetscMalloc1((merge->nsend+1),&s_waits);CHKERRQ(ierr); 4525 for (proc=0,k=0; proc<size; proc++) { 4526 if (!len_s[proc]) continue; 4527 i = owners[proc]; 4528 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4529 k++; 4530 } 4531 4532 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4533 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4534 ierr = PetscFree(status);CHKERRQ(ierr); 4535 4536 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4537 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4538 4539 /* insert mat values of mpimat */ 4540 /*----------------------------*/ 4541 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4542 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4543 4544 for (k=0; k<merge->nrecv; k++) { 4545 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4546 nrows = *(buf_ri_k[k]); 4547 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4548 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4549 } 4550 4551 /* set values of ba */ 4552 m = merge->rowmap->n; 4553 for (i=0; i<m; i++) { 4554 arow = owners[rank] + i; 4555 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4556 bnzi = bi[i+1] - bi[i]; 4557 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4558 4559 /* add local non-zero vals of this proc's seqmat into ba */ 4560 anzi = ai[arow+1] - ai[arow]; 4561 aj = a->j + ai[arow]; 4562 aa = a->a + ai[arow]; 4563 nextaj = 0; 4564 for (j=0; nextaj<anzi; j++) { 4565 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4566 ba_i[j] += aa[nextaj++]; 4567 } 4568 } 4569 4570 /* add received vals into ba */ 4571 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4572 /* i-th row */ 4573 if (i == *nextrow[k]) { 4574 anzi = *(nextai[k]+1) - *nextai[k]; 4575 aj = buf_rj[k] + *(nextai[k]); 4576 aa = abuf_r[k] + *(nextai[k]); 4577 nextaj = 0; 4578 for (j=0; nextaj<anzi; j++) { 4579 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4580 ba_i[j] += aa[nextaj++]; 4581 } 4582 } 4583 nextrow[k]++; nextai[k]++; 4584 } 4585 } 4586 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4587 } 4588 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4589 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4590 4591 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4592 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4593 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4594 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4595 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4596 PetscFunctionReturn(0); 4597 } 4598 4599 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4600 4601 #undef __FUNCT__ 4602 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4603 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4604 { 4605 PetscErrorCode ierr; 4606 Mat B_mpi; 4607 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4608 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4609 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4610 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4611 PetscInt len,proc,*dnz,*onz,bs,cbs; 4612 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4613 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4614 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4615 MPI_Status *status; 4616 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4617 PetscBT lnkbt; 4618 Mat_Merge_SeqsToMPI *merge; 4619 PetscContainer container; 4620 4621 PetscFunctionBegin; 4622 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4623 4624 /* make sure it is a PETSc comm */ 4625 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4626 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4627 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4628 4629 ierr = PetscNew(&merge);CHKERRQ(ierr); 4630 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4631 4632 /* determine row ownership */ 4633 /*---------------------------------------------------------*/ 4634 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4635 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4636 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4637 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4638 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4639 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4640 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4641 4642 m = merge->rowmap->n; 4643 owners = merge->rowmap->range; 4644 4645 /* determine the number of messages to send, their lengths */ 4646 /*---------------------------------------------------------*/ 4647 len_s = merge->len_s; 4648 4649 len = 0; /* length of buf_si[] */ 4650 merge->nsend = 0; 4651 for (proc=0; proc<size; proc++) { 4652 len_si[proc] = 0; 4653 if (proc == rank) { 4654 len_s[proc] = 0; 4655 } else { 4656 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4657 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4658 } 4659 if (len_s[proc]) { 4660 merge->nsend++; 4661 nrows = 0; 4662 for (i=owners[proc]; i<owners[proc+1]; i++) { 4663 if (ai[i+1] > ai[i]) nrows++; 4664 } 4665 len_si[proc] = 2*(nrows+1); 4666 len += len_si[proc]; 4667 } 4668 } 4669 4670 /* determine the number and length of messages to receive for ij-structure */ 4671 /*-------------------------------------------------------------------------*/ 4672 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4673 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4674 4675 /* post the Irecv of j-structure */ 4676 /*-------------------------------*/ 4677 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4678 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4679 4680 /* post the Isend of j-structure */ 4681 /*--------------------------------*/ 4682 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4683 4684 for (proc=0, k=0; proc<size; proc++) { 4685 if (!len_s[proc]) continue; 4686 i = owners[proc]; 4687 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4688 k++; 4689 } 4690 4691 /* receives and sends of j-structure are complete */ 4692 /*------------------------------------------------*/ 4693 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4694 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4695 4696 /* send and recv i-structure */ 4697 /*---------------------------*/ 4698 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4699 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4700 4701 ierr = PetscMalloc1((len+1),&buf_s);CHKERRQ(ierr); 4702 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4703 for (proc=0,k=0; proc<size; proc++) { 4704 if (!len_s[proc]) continue; 4705 /* form outgoing message for i-structure: 4706 buf_si[0]: nrows to be sent 4707 [1:nrows]: row index (global) 4708 [nrows+1:2*nrows+1]: i-structure index 4709 */ 4710 /*-------------------------------------------*/ 4711 nrows = len_si[proc]/2 - 1; 4712 buf_si_i = buf_si + nrows+1; 4713 buf_si[0] = nrows; 4714 buf_si_i[0] = 0; 4715 nrows = 0; 4716 for (i=owners[proc]; i<owners[proc+1]; i++) { 4717 anzi = ai[i+1] - ai[i]; 4718 if (anzi) { 4719 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4720 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4721 nrows++; 4722 } 4723 } 4724 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4725 k++; 4726 buf_si += len_si[proc]; 4727 } 4728 4729 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4730 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4731 4732 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4733 for (i=0; i<merge->nrecv; i++) { 4734 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4735 } 4736 4737 ierr = PetscFree(len_si);CHKERRQ(ierr); 4738 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4739 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4740 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4741 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4742 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4743 ierr = PetscFree(status);CHKERRQ(ierr); 4744 4745 /* compute a local seq matrix in each processor */ 4746 /*----------------------------------------------*/ 4747 /* allocate bi array and free space for accumulating nonzero column info */ 4748 ierr = PetscMalloc1((m+1),&bi);CHKERRQ(ierr); 4749 bi[0] = 0; 4750 4751 /* create and initialize a linked list */ 4752 nlnk = N+1; 4753 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4754 4755 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4756 len = ai[owners[rank+1]] - ai[owners[rank]]; 4757 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4758 4759 current_space = free_space; 4760 4761 /* determine symbolic info for each local row */ 4762 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4763 4764 for (k=0; k<merge->nrecv; k++) { 4765 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4766 nrows = *buf_ri_k[k]; 4767 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4768 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4769 } 4770 4771 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4772 len = 0; 4773 for (i=0; i<m; i++) { 4774 bnzi = 0; 4775 /* add local non-zero cols of this proc's seqmat into lnk */ 4776 arow = owners[rank] + i; 4777 anzi = ai[arow+1] - ai[arow]; 4778 aj = a->j + ai[arow]; 4779 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4780 bnzi += nlnk; 4781 /* add received col data into lnk */ 4782 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4783 if (i == *nextrow[k]) { /* i-th row */ 4784 anzi = *(nextai[k]+1) - *nextai[k]; 4785 aj = buf_rj[k] + *nextai[k]; 4786 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4787 bnzi += nlnk; 4788 nextrow[k]++; nextai[k]++; 4789 } 4790 } 4791 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4792 4793 /* if free space is not available, make more free space */ 4794 if (current_space->local_remaining<bnzi) { 4795 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4796 nspacedouble++; 4797 } 4798 /* copy data into free space, then initialize lnk */ 4799 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4800 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4801 4802 current_space->array += bnzi; 4803 current_space->local_used += bnzi; 4804 current_space->local_remaining -= bnzi; 4805 4806 bi[i+1] = bi[i] + bnzi; 4807 } 4808 4809 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4810 4811 ierr = PetscMalloc1((bi[m]+1),&bj);CHKERRQ(ierr); 4812 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4813 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4814 4815 /* create symbolic parallel matrix B_mpi */ 4816 /*---------------------------------------*/ 4817 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4818 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4819 if (n==PETSC_DECIDE) { 4820 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4821 } else { 4822 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4823 } 4824 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4825 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4826 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4827 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4828 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4829 4830 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4831 B_mpi->assembled = PETSC_FALSE; 4832 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4833 merge->bi = bi; 4834 merge->bj = bj; 4835 merge->buf_ri = buf_ri; 4836 merge->buf_rj = buf_rj; 4837 merge->coi = NULL; 4838 merge->coj = NULL; 4839 merge->owners_co = NULL; 4840 4841 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4842 4843 /* attach the supporting struct to B_mpi for reuse */ 4844 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4845 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4846 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4847 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4848 *mpimat = B_mpi; 4849 4850 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4851 PetscFunctionReturn(0); 4852 } 4853 4854 #undef __FUNCT__ 4855 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4856 /*@C 4857 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4858 matrices from each processor 4859 4860 Collective on MPI_Comm 4861 4862 Input Parameters: 4863 + comm - the communicators the parallel matrix will live on 4864 . seqmat - the input sequential matrices 4865 . m - number of local rows (or PETSC_DECIDE) 4866 . n - number of local columns (or PETSC_DECIDE) 4867 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4868 4869 Output Parameter: 4870 . mpimat - the parallel matrix generated 4871 4872 Level: advanced 4873 4874 Notes: 4875 The dimensions of the sequential matrix in each processor MUST be the same. 4876 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4877 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4878 @*/ 4879 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4880 { 4881 PetscErrorCode ierr; 4882 PetscMPIInt size; 4883 4884 PetscFunctionBegin; 4885 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4886 if (size == 1) { 4887 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4888 if (scall == MAT_INITIAL_MATRIX) { 4889 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4890 } else { 4891 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4892 } 4893 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4894 PetscFunctionReturn(0); 4895 } 4896 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4897 if (scall == MAT_INITIAL_MATRIX) { 4898 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4899 } 4900 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4901 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4902 PetscFunctionReturn(0); 4903 } 4904 4905 #undef __FUNCT__ 4906 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4907 /*@ 4908 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4909 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4910 with MatGetSize() 4911 4912 Not Collective 4913 4914 Input Parameters: 4915 + A - the matrix 4916 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4917 4918 Output Parameter: 4919 . A_loc - the local sequential matrix generated 4920 4921 Level: developer 4922 4923 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4924 4925 @*/ 4926 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4927 { 4928 PetscErrorCode ierr; 4929 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4930 Mat_SeqAIJ *mat,*a,*b; 4931 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4932 MatScalar *aa,*ba,*cam; 4933 PetscScalar *ca; 4934 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4935 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4936 PetscBool match; 4937 MPI_Comm comm; 4938 PetscMPIInt size; 4939 4940 PetscFunctionBegin; 4941 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4942 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4943 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4944 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4945 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 4946 4947 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4948 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4949 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4950 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4951 aa = a->a; ba = b->a; 4952 if (scall == MAT_INITIAL_MATRIX) { 4953 if (size == 1) { 4954 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 4955 PetscFunctionReturn(0); 4956 } 4957 4958 ierr = PetscMalloc1((1+am),&ci);CHKERRQ(ierr); 4959 ci[0] = 0; 4960 for (i=0; i<am; i++) { 4961 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4962 } 4963 ierr = PetscMalloc1((1+ci[am]),&cj);CHKERRQ(ierr); 4964 ierr = PetscMalloc1((1+ci[am]),&ca);CHKERRQ(ierr); 4965 k = 0; 4966 for (i=0; i<am; i++) { 4967 ncols_o = bi[i+1] - bi[i]; 4968 ncols_d = ai[i+1] - ai[i]; 4969 /* off-diagonal portion of A */ 4970 for (jo=0; jo<ncols_o; jo++) { 4971 col = cmap[*bj]; 4972 if (col >= cstart) break; 4973 cj[k] = col; bj++; 4974 ca[k++] = *ba++; 4975 } 4976 /* diagonal portion of A */ 4977 for (j=0; j<ncols_d; j++) { 4978 cj[k] = cstart + *aj++; 4979 ca[k++] = *aa++; 4980 } 4981 /* off-diagonal portion of A */ 4982 for (j=jo; j<ncols_o; j++) { 4983 cj[k] = cmap[*bj++]; 4984 ca[k++] = *ba++; 4985 } 4986 } 4987 /* put together the new matrix */ 4988 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4989 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4990 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4991 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4992 mat->free_a = PETSC_TRUE; 4993 mat->free_ij = PETSC_TRUE; 4994 mat->nonew = 0; 4995 } else if (scall == MAT_REUSE_MATRIX) { 4996 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4997 ci = mat->i; cj = mat->j; cam = mat->a; 4998 for (i=0; i<am; i++) { 4999 /* off-diagonal portion of A */ 5000 ncols_o = bi[i+1] - bi[i]; 5001 for (jo=0; jo<ncols_o; jo++) { 5002 col = cmap[*bj]; 5003 if (col >= cstart) break; 5004 *cam++ = *ba++; bj++; 5005 } 5006 /* diagonal portion of A */ 5007 ncols_d = ai[i+1] - ai[i]; 5008 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5009 /* off-diagonal portion of A */ 5010 for (j=jo; j<ncols_o; j++) { 5011 *cam++ = *ba++; bj++; 5012 } 5013 } 5014 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5015 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5016 PetscFunctionReturn(0); 5017 } 5018 5019 #undef __FUNCT__ 5020 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5021 /*@C 5022 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5023 5024 Not Collective 5025 5026 Input Parameters: 5027 + A - the matrix 5028 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5029 - row, col - index sets of rows and columns to extract (or NULL) 5030 5031 Output Parameter: 5032 . A_loc - the local sequential matrix generated 5033 5034 Level: developer 5035 5036 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5037 5038 @*/ 5039 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5040 { 5041 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5042 PetscErrorCode ierr; 5043 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5044 IS isrowa,iscola; 5045 Mat *aloc; 5046 PetscBool match; 5047 5048 PetscFunctionBegin; 5049 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5050 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5051 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5052 if (!row) { 5053 start = A->rmap->rstart; end = A->rmap->rend; 5054 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5055 } else { 5056 isrowa = *row; 5057 } 5058 if (!col) { 5059 start = A->cmap->rstart; 5060 cmap = a->garray; 5061 nzA = a->A->cmap->n; 5062 nzB = a->B->cmap->n; 5063 ierr = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr); 5064 ncols = 0; 5065 for (i=0; i<nzB; i++) { 5066 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5067 else break; 5068 } 5069 imark = i; 5070 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5071 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5072 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5073 } else { 5074 iscola = *col; 5075 } 5076 if (scall != MAT_INITIAL_MATRIX) { 5077 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5078 aloc[0] = *A_loc; 5079 } 5080 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5081 *A_loc = aloc[0]; 5082 ierr = PetscFree(aloc);CHKERRQ(ierr); 5083 if (!row) { 5084 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5085 } 5086 if (!col) { 5087 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5088 } 5089 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5090 PetscFunctionReturn(0); 5091 } 5092 5093 #undef __FUNCT__ 5094 #define __FUNCT__ "MatGetBrowsOfAcols" 5095 /*@C 5096 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5097 5098 Collective on Mat 5099 5100 Input Parameters: 5101 + A,B - the matrices in mpiaij format 5102 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5103 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5104 5105 Output Parameter: 5106 + rowb, colb - index sets of rows and columns of B to extract 5107 - B_seq - the sequential matrix generated 5108 5109 Level: developer 5110 5111 @*/ 5112 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5113 { 5114 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5115 PetscErrorCode ierr; 5116 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5117 IS isrowb,iscolb; 5118 Mat *bseq=NULL; 5119 5120 PetscFunctionBegin; 5121 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5122 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5123 } 5124 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5125 5126 if (scall == MAT_INITIAL_MATRIX) { 5127 start = A->cmap->rstart; 5128 cmap = a->garray; 5129 nzA = a->A->cmap->n; 5130 nzB = a->B->cmap->n; 5131 ierr = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr); 5132 ncols = 0; 5133 for (i=0; i<nzB; i++) { /* row < local row index */ 5134 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5135 else break; 5136 } 5137 imark = i; 5138 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5139 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5140 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5141 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5142 } else { 5143 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5144 isrowb = *rowb; iscolb = *colb; 5145 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5146 bseq[0] = *B_seq; 5147 } 5148 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5149 *B_seq = bseq[0]; 5150 ierr = PetscFree(bseq);CHKERRQ(ierr); 5151 if (!rowb) { 5152 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5153 } else { 5154 *rowb = isrowb; 5155 } 5156 if (!colb) { 5157 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5158 } else { 5159 *colb = iscolb; 5160 } 5161 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5162 PetscFunctionReturn(0); 5163 } 5164 5165 #undef __FUNCT__ 5166 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5167 /* 5168 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5169 of the OFF-DIAGONAL portion of local A 5170 5171 Collective on Mat 5172 5173 Input Parameters: 5174 + A,B - the matrices in mpiaij format 5175 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5176 5177 Output Parameter: 5178 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5179 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5180 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5181 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5182 5183 Level: developer 5184 5185 */ 5186 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5187 { 5188 VecScatter_MPI_General *gen_to,*gen_from; 5189 PetscErrorCode ierr; 5190 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5191 Mat_SeqAIJ *b_oth; 5192 VecScatter ctx =a->Mvctx; 5193 MPI_Comm comm; 5194 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5195 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5196 PetscScalar *rvalues,*svalues; 5197 MatScalar *b_otha,*bufa,*bufA; 5198 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5199 MPI_Request *rwaits = NULL,*swaits = NULL; 5200 MPI_Status *sstatus,rstatus; 5201 PetscMPIInt jj,size; 5202 PetscInt *cols,sbs,rbs; 5203 PetscScalar *vals; 5204 5205 PetscFunctionBegin; 5206 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5207 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5208 if (size == 1) PetscFunctionReturn(0); 5209 5210 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5211 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5212 } 5213 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5214 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5215 5216 gen_to = (VecScatter_MPI_General*)ctx->todata; 5217 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5218 rvalues = gen_from->values; /* holds the length of receiving row */ 5219 svalues = gen_to->values; /* holds the length of sending row */ 5220 nrecvs = gen_from->n; 5221 nsends = gen_to->n; 5222 5223 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5224 srow = gen_to->indices; /* local row index to be sent */ 5225 sstarts = gen_to->starts; 5226 sprocs = gen_to->procs; 5227 sstatus = gen_to->sstatus; 5228 sbs = gen_to->bs; 5229 rstarts = gen_from->starts; 5230 rprocs = gen_from->procs; 5231 rbs = gen_from->bs; 5232 5233 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5234 if (scall == MAT_INITIAL_MATRIX) { 5235 /* i-array */ 5236 /*---------*/ 5237 /* post receives */ 5238 for (i=0; i<nrecvs; i++) { 5239 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5240 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5241 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5242 } 5243 5244 /* pack the outgoing message */ 5245 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5246 5247 sstartsj[0] = 0; 5248 rstartsj[0] = 0; 5249 len = 0; /* total length of j or a array to be sent */ 5250 k = 0; 5251 for (i=0; i<nsends; i++) { 5252 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5253 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5254 for (j=0; j<nrows; j++) { 5255 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5256 for (l=0; l<sbs; l++) { 5257 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5258 5259 rowlen[j*sbs+l] = ncols; 5260 5261 len += ncols; 5262 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5263 } 5264 k++; 5265 } 5266 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5267 5268 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5269 } 5270 /* recvs and sends of i-array are completed */ 5271 i = nrecvs; 5272 while (i--) { 5273 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5274 } 5275 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5276 5277 /* allocate buffers for sending j and a arrays */ 5278 ierr = PetscMalloc1((len+1),&bufj);CHKERRQ(ierr); 5279 ierr = PetscMalloc1((len+1),&bufa);CHKERRQ(ierr); 5280 5281 /* create i-array of B_oth */ 5282 ierr = PetscMalloc1((aBn+2),&b_othi);CHKERRQ(ierr); 5283 5284 b_othi[0] = 0; 5285 len = 0; /* total length of j or a array to be received */ 5286 k = 0; 5287 for (i=0; i<nrecvs; i++) { 5288 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5289 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5290 for (j=0; j<nrows; j++) { 5291 b_othi[k+1] = b_othi[k] + rowlen[j]; 5292 len += rowlen[j]; k++; 5293 } 5294 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5295 } 5296 5297 /* allocate space for j and a arrrays of B_oth */ 5298 ierr = PetscMalloc1((b_othi[aBn]+1),&b_othj);CHKERRQ(ierr); 5299 ierr = PetscMalloc1((b_othi[aBn]+1),&b_otha);CHKERRQ(ierr); 5300 5301 /* j-array */ 5302 /*---------*/ 5303 /* post receives of j-array */ 5304 for (i=0; i<nrecvs; i++) { 5305 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5306 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5307 } 5308 5309 /* pack the outgoing message j-array */ 5310 k = 0; 5311 for (i=0; i<nsends; i++) { 5312 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5313 bufJ = bufj+sstartsj[i]; 5314 for (j=0; j<nrows; j++) { 5315 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5316 for (ll=0; ll<sbs; ll++) { 5317 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5318 for (l=0; l<ncols; l++) { 5319 *bufJ++ = cols[l]; 5320 } 5321 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5322 } 5323 } 5324 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5325 } 5326 5327 /* recvs and sends of j-array are completed */ 5328 i = nrecvs; 5329 while (i--) { 5330 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5331 } 5332 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5333 } else if (scall == MAT_REUSE_MATRIX) { 5334 sstartsj = *startsj_s; 5335 rstartsj = *startsj_r; 5336 bufa = *bufa_ptr; 5337 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5338 b_otha = b_oth->a; 5339 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5340 5341 /* a-array */ 5342 /*---------*/ 5343 /* post receives of a-array */ 5344 for (i=0; i<nrecvs; i++) { 5345 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5346 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5347 } 5348 5349 /* pack the outgoing message a-array */ 5350 k = 0; 5351 for (i=0; i<nsends; i++) { 5352 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5353 bufA = bufa+sstartsj[i]; 5354 for (j=0; j<nrows; j++) { 5355 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5356 for (ll=0; ll<sbs; ll++) { 5357 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5358 for (l=0; l<ncols; l++) { 5359 *bufA++ = vals[l]; 5360 } 5361 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5362 } 5363 } 5364 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5365 } 5366 /* recvs and sends of a-array are completed */ 5367 i = nrecvs; 5368 while (i--) { 5369 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5370 } 5371 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5372 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5373 5374 if (scall == MAT_INITIAL_MATRIX) { 5375 /* put together the new matrix */ 5376 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5377 5378 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5379 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5380 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5381 b_oth->free_a = PETSC_TRUE; 5382 b_oth->free_ij = PETSC_TRUE; 5383 b_oth->nonew = 0; 5384 5385 ierr = PetscFree(bufj);CHKERRQ(ierr); 5386 if (!startsj_s || !bufa_ptr) { 5387 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5388 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5389 } else { 5390 *startsj_s = sstartsj; 5391 *startsj_r = rstartsj; 5392 *bufa_ptr = bufa; 5393 } 5394 } 5395 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5396 PetscFunctionReturn(0); 5397 } 5398 5399 #undef __FUNCT__ 5400 #define __FUNCT__ "MatGetCommunicationStructs" 5401 /*@C 5402 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5403 5404 Not Collective 5405 5406 Input Parameters: 5407 . A - The matrix in mpiaij format 5408 5409 Output Parameter: 5410 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5411 . colmap - A map from global column index to local index into lvec 5412 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5413 5414 Level: developer 5415 5416 @*/ 5417 #if defined(PETSC_USE_CTABLE) 5418 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5419 #else 5420 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5421 #endif 5422 { 5423 Mat_MPIAIJ *a; 5424 5425 PetscFunctionBegin; 5426 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5427 PetscValidPointer(lvec, 2); 5428 PetscValidPointer(colmap, 3); 5429 PetscValidPointer(multScatter, 4); 5430 a = (Mat_MPIAIJ*) A->data; 5431 if (lvec) *lvec = a->lvec; 5432 if (colmap) *colmap = a->colmap; 5433 if (multScatter) *multScatter = a->Mvctx; 5434 PetscFunctionReturn(0); 5435 } 5436 5437 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5438 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5439 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5440 5441 #undef __FUNCT__ 5442 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5443 /* 5444 Computes (B'*A')' since computing B*A directly is untenable 5445 5446 n p p 5447 ( ) ( ) ( ) 5448 m ( A ) * n ( B ) = m ( C ) 5449 ( ) ( ) ( ) 5450 5451 */ 5452 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5453 { 5454 PetscErrorCode ierr; 5455 Mat At,Bt,Ct; 5456 5457 PetscFunctionBegin; 5458 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5459 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5460 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5461 ierr = MatDestroy(&At);CHKERRQ(ierr); 5462 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5463 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5464 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5465 PetscFunctionReturn(0); 5466 } 5467 5468 #undef __FUNCT__ 5469 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5470 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5471 { 5472 PetscErrorCode ierr; 5473 PetscInt m=A->rmap->n,n=B->cmap->n; 5474 Mat Cmat; 5475 5476 PetscFunctionBegin; 5477 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5478 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5479 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5480 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5481 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5482 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5483 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5484 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5485 5486 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5487 5488 *C = Cmat; 5489 PetscFunctionReturn(0); 5490 } 5491 5492 /* ----------------------------------------------------------------*/ 5493 #undef __FUNCT__ 5494 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5495 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5496 { 5497 PetscErrorCode ierr; 5498 5499 PetscFunctionBegin; 5500 if (scall == MAT_INITIAL_MATRIX) { 5501 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5502 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5503 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5504 } 5505 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5506 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5507 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5508 PetscFunctionReturn(0); 5509 } 5510 5511 #if defined(PETSC_HAVE_MUMPS) 5512 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5513 #endif 5514 #if defined(PETSC_HAVE_PASTIX) 5515 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5516 #endif 5517 #if defined(PETSC_HAVE_SUPERLU_DIST) 5518 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5519 #endif 5520 #if defined(PETSC_HAVE_CLIQUE) 5521 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*); 5522 #endif 5523 5524 /*MC 5525 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5526 5527 Options Database Keys: 5528 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5529 5530 Level: beginner 5531 5532 .seealso: MatCreateAIJ() 5533 M*/ 5534 5535 #undef __FUNCT__ 5536 #define __FUNCT__ "MatCreate_MPIAIJ" 5537 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5538 { 5539 Mat_MPIAIJ *b; 5540 PetscErrorCode ierr; 5541 PetscMPIInt size; 5542 5543 PetscFunctionBegin; 5544 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5545 5546 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5547 B->data = (void*)b; 5548 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5549 B->assembled = PETSC_FALSE; 5550 B->insertmode = NOT_SET_VALUES; 5551 b->size = size; 5552 5553 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5554 5555 /* build cache for off array entries formed */ 5556 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5557 5558 b->donotstash = PETSC_FALSE; 5559 b->colmap = 0; 5560 b->garray = 0; 5561 b->roworiented = PETSC_TRUE; 5562 5563 /* stuff used for matrix vector multiply */ 5564 b->lvec = NULL; 5565 b->Mvctx = NULL; 5566 5567 /* stuff for MatGetRow() */ 5568 b->rowindices = 0; 5569 b->rowvalues = 0; 5570 b->getrowactive = PETSC_FALSE; 5571 5572 /* flexible pointer used in CUSP/CUSPARSE classes */ 5573 b->spptr = NULL; 5574 5575 #if defined(PETSC_HAVE_MUMPS) 5576 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr); 5577 #endif 5578 #if defined(PETSC_HAVE_PASTIX) 5579 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5580 #endif 5581 #if defined(PETSC_HAVE_SUPERLU_DIST) 5582 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5583 #endif 5584 #if defined(PETSC_HAVE_CLIQUE) 5585 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr); 5586 #endif 5587 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5588 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5589 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5590 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5591 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5592 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5593 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5594 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5595 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5596 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5597 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5598 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5599 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5600 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5601 PetscFunctionReturn(0); 5602 } 5603 5604 #undef __FUNCT__ 5605 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5606 /*@C 5607 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5608 and "off-diagonal" part of the matrix in CSR format. 5609 5610 Collective on MPI_Comm 5611 5612 Input Parameters: 5613 + comm - MPI communicator 5614 . m - number of local rows (Cannot be PETSC_DECIDE) 5615 . n - This value should be the same as the local size used in creating the 5616 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5617 calculated if N is given) For square matrices n is almost always m. 5618 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5619 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5620 . i - row indices for "diagonal" portion of matrix 5621 . j - column indices 5622 . a - matrix values 5623 . oi - row indices for "off-diagonal" portion of matrix 5624 . oj - column indices 5625 - oa - matrix values 5626 5627 Output Parameter: 5628 . mat - the matrix 5629 5630 Level: advanced 5631 5632 Notes: 5633 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5634 must free the arrays once the matrix has been destroyed and not before. 5635 5636 The i and j indices are 0 based 5637 5638 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5639 5640 This sets local rows and cannot be used to set off-processor values. 5641 5642 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5643 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5644 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5645 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5646 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5647 communication if it is known that only local entries will be set. 5648 5649 .keywords: matrix, aij, compressed row, sparse, parallel 5650 5651 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5652 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5653 C@*/ 5654 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5655 { 5656 PetscErrorCode ierr; 5657 Mat_MPIAIJ *maij; 5658 5659 PetscFunctionBegin; 5660 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5661 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5662 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5663 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5664 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5665 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5666 maij = (Mat_MPIAIJ*) (*mat)->data; 5667 5668 (*mat)->preallocated = PETSC_TRUE; 5669 5670 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5671 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5672 5673 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5674 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5675 5676 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5677 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5678 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5679 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5680 5681 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5682 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5683 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5684 PetscFunctionReturn(0); 5685 } 5686 5687 /* 5688 Special version for direct calls from Fortran 5689 */ 5690 #include <petsc-private/fortranimpl.h> 5691 5692 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5693 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5694 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5695 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5696 #endif 5697 5698 /* Change these macros so can be used in void function */ 5699 #undef CHKERRQ 5700 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5701 #undef SETERRQ2 5702 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5703 #undef SETERRQ3 5704 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5705 #undef SETERRQ 5706 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5707 5708 #undef __FUNCT__ 5709 #define __FUNCT__ "matsetvaluesmpiaij_" 5710 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5711 { 5712 Mat mat = *mmat; 5713 PetscInt m = *mm, n = *mn; 5714 InsertMode addv = *maddv; 5715 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5716 PetscScalar value; 5717 PetscErrorCode ierr; 5718 5719 MatCheckPreallocated(mat,1); 5720 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5721 5722 #if defined(PETSC_USE_DEBUG) 5723 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5724 #endif 5725 { 5726 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5727 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5728 PetscBool roworiented = aij->roworiented; 5729 5730 /* Some Variables required in the macro */ 5731 Mat A = aij->A; 5732 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5733 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5734 MatScalar *aa = a->a; 5735 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5736 Mat B = aij->B; 5737 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5738 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5739 MatScalar *ba = b->a; 5740 5741 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5742 PetscInt nonew = a->nonew; 5743 MatScalar *ap1,*ap2; 5744 5745 PetscFunctionBegin; 5746 for (i=0; i<m; i++) { 5747 if (im[i] < 0) continue; 5748 #if defined(PETSC_USE_DEBUG) 5749 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5750 #endif 5751 if (im[i] >= rstart && im[i] < rend) { 5752 row = im[i] - rstart; 5753 lastcol1 = -1; 5754 rp1 = aj + ai[row]; 5755 ap1 = aa + ai[row]; 5756 rmax1 = aimax[row]; 5757 nrow1 = ailen[row]; 5758 low1 = 0; 5759 high1 = nrow1; 5760 lastcol2 = -1; 5761 rp2 = bj + bi[row]; 5762 ap2 = ba + bi[row]; 5763 rmax2 = bimax[row]; 5764 nrow2 = bilen[row]; 5765 low2 = 0; 5766 high2 = nrow2; 5767 5768 for (j=0; j<n; j++) { 5769 if (roworiented) value = v[i*n+j]; 5770 else value = v[i+j*m]; 5771 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5772 if (in[j] >= cstart && in[j] < cend) { 5773 col = in[j] - cstart; 5774 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5775 } else if (in[j] < 0) continue; 5776 #if defined(PETSC_USE_DEBUG) 5777 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5778 #endif 5779 else { 5780 if (mat->was_assembled) { 5781 if (!aij->colmap) { 5782 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5783 } 5784 #if defined(PETSC_USE_CTABLE) 5785 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5786 col--; 5787 #else 5788 col = aij->colmap[in[j]] - 1; 5789 #endif 5790 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5791 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5792 col = in[j]; 5793 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5794 B = aij->B; 5795 b = (Mat_SeqAIJ*)B->data; 5796 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5797 rp2 = bj + bi[row]; 5798 ap2 = ba + bi[row]; 5799 rmax2 = bimax[row]; 5800 nrow2 = bilen[row]; 5801 low2 = 0; 5802 high2 = nrow2; 5803 bm = aij->B->rmap->n; 5804 ba = b->a; 5805 } 5806 } else col = in[j]; 5807 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5808 } 5809 } 5810 } else if (!aij->donotstash) { 5811 if (roworiented) { 5812 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5813 } else { 5814 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5815 } 5816 } 5817 } 5818 } 5819 PetscFunctionReturnVoid(); 5820 } 5821 5822