1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 /*MC 6 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 7 8 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 9 and MATMPIAIJ otherwise. As a result, for single process communicators, 10 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 11 for communicators controlling multiple processes. It is recommended that you call both of 12 the above preallocation routines for simplicity. 13 14 Options Database Keys: 15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 16 17 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 18 enough exist. 19 20 Level: beginner 21 22 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 23 M*/ 24 25 /*MC 26 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 27 28 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 29 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 30 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 31 for communicators controlling multiple processes. It is recommended that you call both of 32 the above preallocation routines for simplicity. 33 34 Options Database Keys: 35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 36 37 Level: beginner 38 39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 40 M*/ 41 42 #undef __FUNCT__ 43 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ" 44 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows) 45 { 46 PetscErrorCode ierr; 47 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 48 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 49 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 50 const PetscInt *ia,*ib; 51 const MatScalar *aa,*bb; 52 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 53 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 54 55 PetscFunctionBegin; 56 *keptrows = 0; 57 ia = a->i; 58 ib = b->i; 59 for (i=0; i<m; i++) { 60 na = ia[i+1] - ia[i]; 61 nb = ib[i+1] - ib[i]; 62 if (!na && !nb) { 63 cnt++; 64 goto ok1; 65 } 66 aa = a->a + ia[i]; 67 for (j=0; j<na; j++) { 68 if (aa[j] != 0.0) goto ok1; 69 } 70 bb = b->a + ib[i]; 71 for (j=0; j <nb; j++) { 72 if (bb[j] != 0.0) goto ok1; 73 } 74 cnt++; 75 ok1:; 76 } 77 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr); 78 if (!n0rows) PetscFunctionReturn(0); 79 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 80 cnt = 0; 81 for (i=0; i<m; i++) { 82 na = ia[i+1] - ia[i]; 83 nb = ib[i+1] - ib[i]; 84 if (!na && !nb) continue; 85 aa = a->a + ia[i]; 86 for(j=0; j<na;j++) { 87 if (aa[j] != 0.0) { 88 rows[cnt++] = rstart + i; 89 goto ok2; 90 } 91 } 92 bb = b->a + ib[i]; 93 for (j=0; j<nb; j++) { 94 if (bb[j] != 0.0) { 95 rows[cnt++] = rstart + i; 96 goto ok2; 97 } 98 } 99 ok2:; 100 } 101 ierr = ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 102 PetscFunctionReturn(0); 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 107 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 108 { 109 PetscErrorCode ierr; 110 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 111 PetscInt i,n,*garray = aij->garray; 112 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 113 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 114 PetscReal *work; 115 116 PetscFunctionBegin; 117 ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr); 118 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 119 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 120 if (type == NORM_2) { 121 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 122 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 123 } 124 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 125 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 126 } 127 } else if (type == NORM_1) { 128 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 129 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 130 } 131 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 132 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 133 } 134 } else if (type == NORM_INFINITY) { 135 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 136 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 137 } 138 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 139 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 140 } 141 142 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType"); 143 if (type == NORM_INFINITY) { 144 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 145 } else { 146 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 147 } 148 ierr = PetscFree(work);CHKERRQ(ierr); 149 if (type == NORM_2) { 150 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 151 } 152 PetscFunctionReturn(0); 153 } 154 155 #undef __FUNCT__ 156 #define __FUNCT__ "MatDistribute_MPIAIJ" 157 /* 158 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 159 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 160 161 Only for square matrices 162 */ 163 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 164 { 165 PetscMPIInt rank,size; 166 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 167 PetscErrorCode ierr; 168 Mat mat; 169 Mat_SeqAIJ *gmata; 170 PetscMPIInt tag; 171 MPI_Status status; 172 PetscBool aij; 173 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 174 175 PetscFunctionBegin; 176 CHKMEMQ; 177 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 178 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 179 if (!rank) { 180 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 181 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 182 } 183 if (reuse == MAT_INITIAL_MATRIX) { 184 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 185 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 186 ierr = MatSetBlockSizes(mat,gmat->rmap->bs,gmat->cmap->bs);CHKERRQ(ierr); 187 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 188 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 189 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 190 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 191 rowners[0] = 0; 192 for (i=2; i<=size; i++) { 193 rowners[i] += rowners[i-1]; 194 } 195 rstart = rowners[rank]; 196 rend = rowners[rank+1]; 197 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 198 if (!rank) { 199 gmata = (Mat_SeqAIJ*) gmat->data; 200 /* send row lengths to all processors */ 201 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 202 for (i=1; i<size; i++) { 203 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 204 } 205 /* determine number diagonal and off-diagonal counts */ 206 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 207 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 208 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 209 jj = 0; 210 for (i=0; i<m; i++) { 211 for (j=0; j<dlens[i]; j++) { 212 if (gmata->j[jj] < rstart) ld[i]++; 213 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 214 jj++; 215 } 216 } 217 /* send column indices to other processes */ 218 for (i=1; i<size; i++) { 219 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 220 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 221 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 222 } 223 224 /* send numerical values to other processes */ 225 for (i=1; i<size; i++) { 226 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 227 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 228 } 229 gmataa = gmata->a; 230 gmataj = gmata->j; 231 232 } else { 233 /* receive row lengths */ 234 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 235 /* receive column indices */ 236 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 237 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 238 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 239 /* determine number diagonal and off-diagonal counts */ 240 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 241 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 242 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 243 jj = 0; 244 for (i=0; i<m; i++) { 245 for (j=0; j<dlens[i]; j++) { 246 if (gmataj[jj] < rstart) ld[i]++; 247 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 248 jj++; 249 } 250 } 251 /* receive numerical values */ 252 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 253 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 254 } 255 /* set preallocation */ 256 for (i=0; i<m; i++) { 257 dlens[i] -= olens[i]; 258 } 259 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 260 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 261 262 for (i=0; i<m; i++) { 263 dlens[i] += olens[i]; 264 } 265 cnt = 0; 266 for (i=0; i<m; i++) { 267 row = rstart + i; 268 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 269 cnt += dlens[i]; 270 } 271 if (rank) { 272 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 273 } 274 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 275 ierr = PetscFree(rowners);CHKERRQ(ierr); 276 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 277 *inmat = mat; 278 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 279 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 280 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 281 mat = *inmat; 282 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 283 if (!rank) { 284 /* send numerical values to other processes */ 285 gmata = (Mat_SeqAIJ*) gmat->data; 286 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 287 gmataa = gmata->a; 288 for (i=1; i<size; i++) { 289 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 290 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 291 } 292 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 293 } else { 294 /* receive numerical values from process 0*/ 295 nz = Ad->nz + Ao->nz; 296 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 297 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 298 } 299 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 300 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 301 ad = Ad->a; 302 ao = Ao->a; 303 if (mat->rmap->n) { 304 i = 0; 305 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 306 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 307 } 308 for (i=1; i<mat->rmap->n; i++) { 309 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 310 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 311 } 312 i--; 313 if (mat->rmap->n) { 314 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 315 } 316 if (rank) { 317 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 318 } 319 } 320 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 321 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 322 CHKMEMQ; 323 PetscFunctionReturn(0); 324 } 325 326 /* 327 Local utility routine that creates a mapping from the global column 328 number to the local number in the off-diagonal part of the local 329 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 330 a slightly higher hash table cost; without it it is not scalable (each processor 331 has an order N integer array but is fast to acess. 332 */ 333 #undef __FUNCT__ 334 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 335 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 336 { 337 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 338 PetscErrorCode ierr; 339 PetscInt n = aij->B->cmap->n,i; 340 341 PetscFunctionBegin; 342 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 343 #if defined (PETSC_USE_CTABLE) 344 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 345 for (i=0; i<n; i++){ 346 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 347 } 348 #else 349 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 350 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 351 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 352 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 353 #endif 354 PetscFunctionReturn(0); 355 } 356 357 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 358 { \ 359 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 360 lastcol1 = col;\ 361 while (high1-low1 > 5) { \ 362 t = (low1+high1)/2; \ 363 if (rp1[t] > col) high1 = t; \ 364 else low1 = t; \ 365 } \ 366 for (_i=low1; _i<high1; _i++) { \ 367 if (rp1[_i] > col) break; \ 368 if (rp1[_i] == col) { \ 369 if (addv == ADD_VALUES) ap1[_i] += value; \ 370 else ap1[_i] = value; \ 371 goto a_noinsert; \ 372 } \ 373 } \ 374 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 375 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 376 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 377 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 378 N = nrow1++ - 1; a->nz++; high1++; \ 379 /* shift up all the later entries in this row */ \ 380 for (ii=N; ii>=_i; ii--) { \ 381 rp1[ii+1] = rp1[ii]; \ 382 ap1[ii+1] = ap1[ii]; \ 383 } \ 384 rp1[_i] = col; \ 385 ap1[_i] = value; \ 386 a_noinsert: ; \ 387 ailen[row] = nrow1; \ 388 } 389 390 391 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 392 { \ 393 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 394 lastcol2 = col;\ 395 while (high2-low2 > 5) { \ 396 t = (low2+high2)/2; \ 397 if (rp2[t] > col) high2 = t; \ 398 else low2 = t; \ 399 } \ 400 for (_i=low2; _i<high2; _i++) { \ 401 if (rp2[_i] > col) break; \ 402 if (rp2[_i] == col) { \ 403 if (addv == ADD_VALUES) ap2[_i] += value; \ 404 else ap2[_i] = value; \ 405 goto b_noinsert; \ 406 } \ 407 } \ 408 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 409 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 410 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 411 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 412 N = nrow2++ - 1; b->nz++; high2++; \ 413 /* shift up all the later entries in this row */ \ 414 for (ii=N; ii>=_i; ii--) { \ 415 rp2[ii+1] = rp2[ii]; \ 416 ap2[ii+1] = ap2[ii]; \ 417 } \ 418 rp2[_i] = col; \ 419 ap2[_i] = value; \ 420 b_noinsert: ; \ 421 bilen[row] = nrow2; \ 422 } 423 424 #undef __FUNCT__ 425 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 426 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 427 { 428 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 429 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 430 PetscErrorCode ierr; 431 PetscInt l,*garray = mat->garray,diag; 432 433 PetscFunctionBegin; 434 /* code only works for square matrices A */ 435 436 /* find size of row to the left of the diagonal part */ 437 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 438 row = row - diag; 439 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 440 if (garray[b->j[b->i[row]+l]] > diag) break; 441 } 442 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 443 444 /* diagonal part */ 445 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 446 447 /* right of diagonal part */ 448 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 449 PetscFunctionReturn(0); 450 } 451 452 #undef __FUNCT__ 453 #define __FUNCT__ "MatSetValues_MPIAIJ" 454 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 455 { 456 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 457 PetscScalar value; 458 PetscErrorCode ierr; 459 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 460 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 461 PetscBool roworiented = aij->roworiented; 462 463 /* Some Variables required in the macro */ 464 Mat A = aij->A; 465 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 466 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 467 MatScalar *aa = a->a; 468 PetscBool ignorezeroentries = a->ignorezeroentries; 469 Mat B = aij->B; 470 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 471 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 472 MatScalar *ba = b->a; 473 474 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 475 PetscInt nonew; 476 MatScalar *ap1,*ap2; 477 478 PetscFunctionBegin; 479 if (v) PetscValidScalarPointer(v,6); 480 for (i=0; i<m; i++) { 481 if (im[i] < 0) continue; 482 #if defined(PETSC_USE_DEBUG) 483 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 484 #endif 485 if (im[i] >= rstart && im[i] < rend) { 486 row = im[i] - rstart; 487 lastcol1 = -1; 488 rp1 = aj + ai[row]; 489 ap1 = aa + ai[row]; 490 rmax1 = aimax[row]; 491 nrow1 = ailen[row]; 492 low1 = 0; 493 high1 = nrow1; 494 lastcol2 = -1; 495 rp2 = bj + bi[row]; 496 ap2 = ba + bi[row]; 497 rmax2 = bimax[row]; 498 nrow2 = bilen[row]; 499 low2 = 0; 500 high2 = nrow2; 501 502 for (j=0; j<n; j++) { 503 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 504 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 505 if (in[j] >= cstart && in[j] < cend){ 506 col = in[j] - cstart; 507 nonew = a->nonew; 508 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 509 } else if (in[j] < 0) continue; 510 #if defined(PETSC_USE_DEBUG) 511 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 512 #endif 513 else { 514 if (mat->was_assembled) { 515 if (!aij->colmap) { 516 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 517 } 518 #if defined (PETSC_USE_CTABLE) 519 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 520 col--; 521 #else 522 col = aij->colmap[in[j]] - 1; 523 #endif 524 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 525 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 526 col = in[j]; 527 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 528 B = aij->B; 529 b = (Mat_SeqAIJ*)B->data; 530 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 531 rp2 = bj + bi[row]; 532 ap2 = ba + bi[row]; 533 rmax2 = bimax[row]; 534 nrow2 = bilen[row]; 535 low2 = 0; 536 high2 = nrow2; 537 bm = aij->B->rmap->n; 538 ba = b->a; 539 } 540 } else col = in[j]; 541 nonew = b->nonew; 542 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 543 } 544 } 545 } else { 546 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 547 if (!aij->donotstash) { 548 mat->assembled = PETSC_FALSE; 549 if (roworiented) { 550 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 551 } else { 552 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 553 } 554 } 555 } 556 } 557 PetscFunctionReturn(0); 558 } 559 560 #undef __FUNCT__ 561 #define __FUNCT__ "MatGetValues_MPIAIJ" 562 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 563 { 564 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 565 PetscErrorCode ierr; 566 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 567 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 568 569 PetscFunctionBegin; 570 for (i=0; i<m; i++) { 571 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 572 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 573 if (idxm[i] >= rstart && idxm[i] < rend) { 574 row = idxm[i] - rstart; 575 for (j=0; j<n; j++) { 576 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 577 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 578 if (idxn[j] >= cstart && idxn[j] < cend){ 579 col = idxn[j] - cstart; 580 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 581 } else { 582 if (!aij->colmap) { 583 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 584 } 585 #if defined (PETSC_USE_CTABLE) 586 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 587 col --; 588 #else 589 col = aij->colmap[idxn[j]] - 1; 590 #endif 591 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 592 else { 593 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 594 } 595 } 596 } 597 } else { 598 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 599 } 600 } 601 PetscFunctionReturn(0); 602 } 603 604 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 605 606 #undef __FUNCT__ 607 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 608 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 609 { 610 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 611 PetscErrorCode ierr; 612 PetscInt nstash,reallocs; 613 InsertMode addv; 614 615 PetscFunctionBegin; 616 if (aij->donotstash || mat->nooffprocentries) { 617 PetscFunctionReturn(0); 618 } 619 620 /* make sure all processors are either in INSERTMODE or ADDMODE */ 621 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 622 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 623 mat->insertmode = addv; /* in case this processor had no cache */ 624 625 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 626 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 627 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 628 PetscFunctionReturn(0); 629 } 630 631 #undef __FUNCT__ 632 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 633 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 634 { 635 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 636 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 637 PetscErrorCode ierr; 638 PetscMPIInt n; 639 PetscInt i,j,rstart,ncols,flg; 640 PetscInt *row,*col; 641 PetscBool other_disassembled; 642 PetscScalar *val; 643 InsertMode addv = mat->insertmode; 644 645 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 646 PetscFunctionBegin; 647 if (!aij->donotstash && !mat->nooffprocentries) { 648 while (1) { 649 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 650 if (!flg) break; 651 652 for (i=0; i<n;) { 653 /* Now identify the consecutive vals belonging to the same row */ 654 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 655 if (j < n) ncols = j-i; 656 else ncols = n-i; 657 /* Now assemble all these values with a single function call */ 658 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 659 i = j; 660 } 661 } 662 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 663 } 664 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 665 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 666 667 /* determine if any processor has disassembled, if so we must 668 also disassemble ourselfs, in order that we may reassemble. */ 669 /* 670 if nonzero structure of submatrix B cannot change then we know that 671 no processor disassembled thus we can skip this stuff 672 */ 673 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 674 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 675 if (mat->was_assembled && !other_disassembled) { 676 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 677 } 678 } 679 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 680 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 681 } 682 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 683 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 684 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 685 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 686 687 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 688 aij->rowvalues = 0; 689 690 /* used by MatAXPY() */ 691 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 692 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 693 694 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 695 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 696 PetscFunctionReturn(0); 697 } 698 699 #undef __FUNCT__ 700 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 701 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 702 { 703 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 704 PetscErrorCode ierr; 705 706 PetscFunctionBegin; 707 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 708 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 709 PetscFunctionReturn(0); 710 } 711 712 #undef __FUNCT__ 713 #define __FUNCT__ "MatZeroRows_MPIAIJ" 714 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 715 { 716 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 717 PetscErrorCode ierr; 718 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 719 PetscInt i,*owners = A->rmap->range; 720 PetscInt *nprocs,j,idx,nsends,row; 721 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 722 PetscInt *rvalues,count,base,slen,*source; 723 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 724 MPI_Comm comm = ((PetscObject)A)->comm; 725 MPI_Request *send_waits,*recv_waits; 726 MPI_Status recv_status,*send_status; 727 const PetscScalar *xx; 728 PetscScalar *bb; 729 #if defined(PETSC_DEBUG) 730 PetscBool found = PETSC_FALSE; 731 #endif 732 733 PetscFunctionBegin; 734 /* first count number of contributors to each processor */ 735 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 736 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 737 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 738 j = 0; 739 for (i=0; i<N; i++) { 740 if (lastidx > (idx = rows[i])) j = 0; 741 lastidx = idx; 742 for (; j<size; j++) { 743 if (idx >= owners[j] && idx < owners[j+1]) { 744 nprocs[2*j]++; 745 nprocs[2*j+1] = 1; 746 owner[i] = j; 747 #if defined(PETSC_DEBUG) 748 found = PETSC_TRUE; 749 #endif 750 break; 751 } 752 } 753 #if defined(PETSC_DEBUG) 754 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 755 found = PETSC_FALSE; 756 #endif 757 } 758 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 759 760 if (A->nooffproczerorows) { 761 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 762 nrecvs = nsends; 763 nmax = N; 764 } else { 765 /* inform other processors of number of messages and max length*/ 766 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 767 } 768 769 /* post receives: */ 770 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 771 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 772 for (i=0; i<nrecvs; i++) { 773 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 774 } 775 776 /* do sends: 777 1) starts[i] gives the starting index in svalues for stuff going to 778 the ith processor 779 */ 780 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 781 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 782 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 783 starts[0] = 0; 784 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 785 for (i=0; i<N; i++) { 786 svalues[starts[owner[i]]++] = rows[i]; 787 } 788 789 starts[0] = 0; 790 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 791 count = 0; 792 for (i=0; i<size; i++) { 793 if (nprocs[2*i+1]) { 794 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 795 } 796 } 797 ierr = PetscFree(starts);CHKERRQ(ierr); 798 799 base = owners[rank]; 800 801 /* wait on receives */ 802 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 803 count = nrecvs; slen = 0; 804 while (count) { 805 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 806 /* unpack receives into our local space */ 807 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 808 source[imdex] = recv_status.MPI_SOURCE; 809 lens[imdex] = n; 810 slen += n; 811 count--; 812 } 813 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 814 815 /* move the data into the send scatter */ 816 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 817 count = 0; 818 for (i=0; i<nrecvs; i++) { 819 values = rvalues + i*nmax; 820 for (j=0; j<lens[i]; j++) { 821 lrows[count++] = values[j] - base; 822 } 823 } 824 ierr = PetscFree(rvalues);CHKERRQ(ierr); 825 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 826 ierr = PetscFree(owner);CHKERRQ(ierr); 827 ierr = PetscFree(nprocs);CHKERRQ(ierr); 828 829 /* fix right hand side if needed */ 830 if (x && b) { 831 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 832 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 833 for (i=0; i<slen; i++) { 834 bb[lrows[i]] = diag*xx[lrows[i]]; 835 } 836 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 837 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 838 } 839 /* 840 Zero the required rows. If the "diagonal block" of the matrix 841 is square and the user wishes to set the diagonal we use separate 842 code so that MatSetValues() is not called for each diagonal allocating 843 new memory, thus calling lots of mallocs and slowing things down. 844 845 */ 846 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 847 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 848 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 849 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 850 } else if (diag != 0.0) { 851 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 852 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 853 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 854 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 855 } 856 for (i = 0; i < slen; i++) { 857 row = lrows[i] + rstart; 858 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 859 } 860 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 861 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 862 } else { 863 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 864 } 865 ierr = PetscFree(lrows);CHKERRQ(ierr); 866 867 /* wait on sends */ 868 if (nsends) { 869 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 870 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 871 ierr = PetscFree(send_status);CHKERRQ(ierr); 872 } 873 ierr = PetscFree(send_waits);CHKERRQ(ierr); 874 ierr = PetscFree(svalues);CHKERRQ(ierr); 875 PetscFunctionReturn(0); 876 } 877 878 #undef __FUNCT__ 879 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 880 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 881 { 882 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 883 PetscErrorCode ierr; 884 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 885 PetscInt i,*owners = A->rmap->range; 886 PetscInt *nprocs,j,idx,nsends; 887 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 888 PetscInt *rvalues,count,base,slen,*source; 889 PetscInt *lens,*lrows,*values,m; 890 MPI_Comm comm = ((PetscObject)A)->comm; 891 MPI_Request *send_waits,*recv_waits; 892 MPI_Status recv_status,*send_status; 893 const PetscScalar *xx; 894 PetscScalar *bb,*mask; 895 Vec xmask,lmask; 896 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 897 const PetscInt *aj, *ii,*ridx; 898 PetscScalar *aa; 899 #if defined(PETSC_DEBUG) 900 PetscBool found = PETSC_FALSE; 901 #endif 902 903 PetscFunctionBegin; 904 /* first count number of contributors to each processor */ 905 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 906 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 907 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 908 j = 0; 909 for (i=0; i<N; i++) { 910 if (lastidx > (idx = rows[i])) j = 0; 911 lastidx = idx; 912 for (; j<size; j++) { 913 if (idx >= owners[j] && idx < owners[j+1]) { 914 nprocs[2*j]++; 915 nprocs[2*j+1] = 1; 916 owner[i] = j; 917 #if defined(PETSC_DEBUG) 918 found = PETSC_TRUE; 919 #endif 920 break; 921 } 922 } 923 #if defined(PETSC_DEBUG) 924 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 925 found = PETSC_FALSE; 926 #endif 927 } 928 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 929 930 /* inform other processors of number of messages and max length*/ 931 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 932 933 /* post receives: */ 934 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 935 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 936 for (i=0; i<nrecvs; i++) { 937 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 938 } 939 940 /* do sends: 941 1) starts[i] gives the starting index in svalues for stuff going to 942 the ith processor 943 */ 944 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 945 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 946 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 947 starts[0] = 0; 948 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 949 for (i=0; i<N; i++) { 950 svalues[starts[owner[i]]++] = rows[i]; 951 } 952 953 starts[0] = 0; 954 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 955 count = 0; 956 for (i=0; i<size; i++) { 957 if (nprocs[2*i+1]) { 958 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 959 } 960 } 961 ierr = PetscFree(starts);CHKERRQ(ierr); 962 963 base = owners[rank]; 964 965 /* wait on receives */ 966 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 967 count = nrecvs; slen = 0; 968 while (count) { 969 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 970 /* unpack receives into our local space */ 971 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 972 source[imdex] = recv_status.MPI_SOURCE; 973 lens[imdex] = n; 974 slen += n; 975 count--; 976 } 977 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 978 979 /* move the data into the send scatter */ 980 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 981 count = 0; 982 for (i=0; i<nrecvs; i++) { 983 values = rvalues + i*nmax; 984 for (j=0; j<lens[i]; j++) { 985 lrows[count++] = values[j] - base; 986 } 987 } 988 ierr = PetscFree(rvalues);CHKERRQ(ierr); 989 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 990 ierr = PetscFree(owner);CHKERRQ(ierr); 991 ierr = PetscFree(nprocs);CHKERRQ(ierr); 992 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 993 994 /* zero diagonal part of matrix */ 995 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 996 997 /* handle off diagonal part of matrix */ 998 ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr); 999 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1000 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1001 for (i=0; i<slen; i++) { 1002 bb[lrows[i]] = 1; 1003 } 1004 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1005 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1006 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1007 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1008 if (x) { 1009 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1010 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1011 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1012 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1013 } 1014 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1015 1016 /* remove zeroed rows of off diagonal matrix */ 1017 ii = aij->i; 1018 for (i=0; i<slen; i++) { 1019 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1020 } 1021 1022 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1023 if (aij->compressedrow.use){ 1024 m = aij->compressedrow.nrows; 1025 ii = aij->compressedrow.i; 1026 ridx = aij->compressedrow.rindex; 1027 for (i=0; i<m; i++){ 1028 n = ii[i+1] - ii[i]; 1029 aj = aij->j + ii[i]; 1030 aa = aij->a + ii[i]; 1031 1032 for (j=0; j<n; j++) { 1033 if (PetscAbsScalar(mask[*aj])) { 1034 if (b) bb[*ridx] -= *aa*xx[*aj]; 1035 *aa = 0.0; 1036 } 1037 aa++; 1038 aj++; 1039 } 1040 ridx++; 1041 } 1042 } else { /* do not use compressed row format */ 1043 m = l->B->rmap->n; 1044 for (i=0; i<m; i++) { 1045 n = ii[i+1] - ii[i]; 1046 aj = aij->j + ii[i]; 1047 aa = aij->a + ii[i]; 1048 for (j=0; j<n; j++) { 1049 if (PetscAbsScalar(mask[*aj])) { 1050 if (b) bb[i] -= *aa*xx[*aj]; 1051 *aa = 0.0; 1052 } 1053 aa++; 1054 aj++; 1055 } 1056 } 1057 } 1058 if (x) { 1059 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1060 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1061 } 1062 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1063 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1064 ierr = PetscFree(lrows);CHKERRQ(ierr); 1065 1066 /* wait on sends */ 1067 if (nsends) { 1068 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1069 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1070 ierr = PetscFree(send_status);CHKERRQ(ierr); 1071 } 1072 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1073 ierr = PetscFree(svalues);CHKERRQ(ierr); 1074 1075 PetscFunctionReturn(0); 1076 } 1077 1078 #undef __FUNCT__ 1079 #define __FUNCT__ "MatMult_MPIAIJ" 1080 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1081 { 1082 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1083 PetscErrorCode ierr; 1084 PetscInt nt; 1085 1086 PetscFunctionBegin; 1087 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1088 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1089 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1090 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1091 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1092 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1093 PetscFunctionReturn(0); 1094 } 1095 1096 #undef __FUNCT__ 1097 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1098 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1099 { 1100 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1101 PetscErrorCode ierr; 1102 1103 PetscFunctionBegin; 1104 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1105 PetscFunctionReturn(0); 1106 } 1107 1108 #undef __FUNCT__ 1109 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1110 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1111 { 1112 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1113 PetscErrorCode ierr; 1114 1115 PetscFunctionBegin; 1116 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1117 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1118 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1119 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1120 PetscFunctionReturn(0); 1121 } 1122 1123 #undef __FUNCT__ 1124 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1125 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1126 { 1127 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1128 PetscErrorCode ierr; 1129 PetscBool merged; 1130 1131 PetscFunctionBegin; 1132 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1133 /* do nondiagonal part */ 1134 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1135 if (!merged) { 1136 /* send it on its way */ 1137 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1138 /* do local part */ 1139 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1140 /* receive remote parts: note this assumes the values are not actually */ 1141 /* added in yy until the next line, */ 1142 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1143 } else { 1144 /* do local part */ 1145 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1146 /* send it on its way */ 1147 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1148 /* values actually were received in the Begin() but we need to call this nop */ 1149 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1150 } 1151 PetscFunctionReturn(0); 1152 } 1153 1154 EXTERN_C_BEGIN 1155 #undef __FUNCT__ 1156 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1157 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1158 { 1159 MPI_Comm comm; 1160 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 1161 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1162 IS Me,Notme; 1163 PetscErrorCode ierr; 1164 PetscInt M,N,first,last,*notme,i; 1165 PetscMPIInt size; 1166 1167 PetscFunctionBegin; 1168 1169 /* Easy test: symmetric diagonal block */ 1170 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 1171 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1172 if (!*f) PetscFunctionReturn(0); 1173 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1174 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1175 if (size == 1) PetscFunctionReturn(0); 1176 1177 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1178 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1179 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1180 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1181 for (i=0; i<first; i++) notme[i] = i; 1182 for (i=last; i<M; i++) notme[i-last+first] = i; 1183 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1184 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1185 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1186 Aoff = Aoffs[0]; 1187 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1188 Boff = Boffs[0]; 1189 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1190 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1191 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1192 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1193 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1194 ierr = PetscFree(notme);CHKERRQ(ierr); 1195 PetscFunctionReturn(0); 1196 } 1197 EXTERN_C_END 1198 1199 #undef __FUNCT__ 1200 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1201 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1202 { 1203 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1204 PetscErrorCode ierr; 1205 1206 PetscFunctionBegin; 1207 /* do nondiagonal part */ 1208 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1209 /* send it on its way */ 1210 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1211 /* do local part */ 1212 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1213 /* receive remote parts */ 1214 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1215 PetscFunctionReturn(0); 1216 } 1217 1218 /* 1219 This only works correctly for square matrices where the subblock A->A is the 1220 diagonal block 1221 */ 1222 #undef __FUNCT__ 1223 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1224 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1225 { 1226 PetscErrorCode ierr; 1227 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1228 1229 PetscFunctionBegin; 1230 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1231 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1232 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1233 PetscFunctionReturn(0); 1234 } 1235 1236 #undef __FUNCT__ 1237 #define __FUNCT__ "MatScale_MPIAIJ" 1238 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1239 { 1240 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1241 PetscErrorCode ierr; 1242 1243 PetscFunctionBegin; 1244 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1245 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1246 PetscFunctionReturn(0); 1247 } 1248 1249 #undef __FUNCT__ 1250 #define __FUNCT__ "MatDestroy_MPIAIJ" 1251 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1252 { 1253 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1254 PetscErrorCode ierr; 1255 1256 PetscFunctionBegin; 1257 #if defined(PETSC_USE_LOG) 1258 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1259 #endif 1260 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1261 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1262 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1263 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1264 #if defined (PETSC_USE_CTABLE) 1265 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1266 #else 1267 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1268 #endif 1269 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1270 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1271 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1272 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1273 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1274 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1275 1276 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1277 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1278 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1279 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1280 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1281 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1282 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1283 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1284 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1285 PetscFunctionReturn(0); 1286 } 1287 1288 #undef __FUNCT__ 1289 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1290 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1291 { 1292 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1293 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 1294 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 1295 PetscErrorCode ierr; 1296 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1297 int fd; 1298 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1299 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1300 PetscScalar *column_values; 1301 PetscInt message_count,flowcontrolcount; 1302 FILE *file; 1303 1304 PetscFunctionBegin; 1305 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1306 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1307 nz = A->nz + B->nz; 1308 if (!rank) { 1309 header[0] = MAT_FILE_CLASSID; 1310 header[1] = mat->rmap->N; 1311 header[2] = mat->cmap->N; 1312 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1313 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1314 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1315 /* get largest number of rows any processor has */ 1316 rlen = mat->rmap->n; 1317 range = mat->rmap->range; 1318 for (i=1; i<size; i++) { 1319 rlen = PetscMax(rlen,range[i+1] - range[i]); 1320 } 1321 } else { 1322 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1323 rlen = mat->rmap->n; 1324 } 1325 1326 /* load up the local row counts */ 1327 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1328 for (i=0; i<mat->rmap->n; i++) { 1329 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1330 } 1331 1332 /* store the row lengths to the file */ 1333 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1334 if (!rank) { 1335 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1336 for (i=1; i<size; i++) { 1337 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1338 rlen = range[i+1] - range[i]; 1339 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1340 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1341 } 1342 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1343 } else { 1344 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1345 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1346 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1347 } 1348 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1349 1350 /* load up the local column indices */ 1351 nzmax = nz; /* )th processor needs space a largest processor needs */ 1352 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1353 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1354 cnt = 0; 1355 for (i=0; i<mat->rmap->n; i++) { 1356 for (j=B->i[i]; j<B->i[i+1]; j++) { 1357 if ( (col = garray[B->j[j]]) > cstart) break; 1358 column_indices[cnt++] = col; 1359 } 1360 for (k=A->i[i]; k<A->i[i+1]; k++) { 1361 column_indices[cnt++] = A->j[k] + cstart; 1362 } 1363 for (; j<B->i[i+1]; j++) { 1364 column_indices[cnt++] = garray[B->j[j]]; 1365 } 1366 } 1367 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1368 1369 /* store the column indices to the file */ 1370 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1371 if (!rank) { 1372 MPI_Status status; 1373 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1374 for (i=1; i<size; i++) { 1375 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1376 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1377 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1378 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1379 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1380 } 1381 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1382 } else { 1383 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1384 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1385 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1386 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1387 } 1388 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1389 1390 /* load up the local column values */ 1391 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1392 cnt = 0; 1393 for (i=0; i<mat->rmap->n; i++) { 1394 for (j=B->i[i]; j<B->i[i+1]; j++) { 1395 if ( garray[B->j[j]] > cstart) break; 1396 column_values[cnt++] = B->a[j]; 1397 } 1398 for (k=A->i[i]; k<A->i[i+1]; k++) { 1399 column_values[cnt++] = A->a[k]; 1400 } 1401 for (; j<B->i[i+1]; j++) { 1402 column_values[cnt++] = B->a[j]; 1403 } 1404 } 1405 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1406 1407 /* store the column values to the file */ 1408 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1409 if (!rank) { 1410 MPI_Status status; 1411 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1412 for (i=1; i<size; i++) { 1413 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1414 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1415 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1416 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1417 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1418 } 1419 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1420 } else { 1421 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1422 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1423 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1424 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1425 } 1426 ierr = PetscFree(column_values);CHKERRQ(ierr); 1427 1428 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1429 if (file) { 1430 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1431 } 1432 1433 PetscFunctionReturn(0); 1434 } 1435 1436 #undef __FUNCT__ 1437 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1438 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1439 { 1440 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1441 PetscErrorCode ierr; 1442 PetscMPIInt rank = aij->rank,size = aij->size; 1443 PetscBool isdraw,iascii,isbinary; 1444 PetscViewer sviewer; 1445 PetscViewerFormat format; 1446 1447 PetscFunctionBegin; 1448 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1449 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1450 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1451 if (iascii) { 1452 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1453 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1454 MatInfo info; 1455 PetscBool inodes; 1456 1457 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1458 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1459 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1460 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1461 if (!inodes) { 1462 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1463 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1464 } else { 1465 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1466 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1467 } 1468 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1469 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1470 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1471 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1472 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1473 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1474 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1475 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1476 PetscFunctionReturn(0); 1477 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1478 PetscInt inodecount,inodelimit,*inodes; 1479 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1480 if (inodes) { 1481 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1482 } else { 1483 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1484 } 1485 PetscFunctionReturn(0); 1486 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1487 PetscFunctionReturn(0); 1488 } 1489 } else if (isbinary) { 1490 if (size == 1) { 1491 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1492 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1493 } else { 1494 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1495 } 1496 PetscFunctionReturn(0); 1497 } else if (isdraw) { 1498 PetscDraw draw; 1499 PetscBool isnull; 1500 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1501 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1502 } 1503 1504 if (size == 1) { 1505 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1506 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1507 } else { 1508 /* assemble the entire matrix onto first processor. */ 1509 Mat A; 1510 Mat_SeqAIJ *Aloc; 1511 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1512 MatScalar *a; 1513 1514 if (mat->rmap->N > 1024) { 1515 PetscBool flg = PETSC_FALSE; 1516 1517 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1518 if (!flg) { 1519 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1520 } 1521 } 1522 1523 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1524 if (!rank) { 1525 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1526 } else { 1527 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1528 } 1529 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1530 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1531 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1532 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1533 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1534 1535 /* copy over the A part */ 1536 Aloc = (Mat_SeqAIJ*)aij->A->data; 1537 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1538 row = mat->rmap->rstart; 1539 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1540 for (i=0; i<m; i++) { 1541 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1542 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1543 } 1544 aj = Aloc->j; 1545 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1546 1547 /* copy over the B part */ 1548 Aloc = (Mat_SeqAIJ*)aij->B->data; 1549 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1550 row = mat->rmap->rstart; 1551 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1552 ct = cols; 1553 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1554 for (i=0; i<m; i++) { 1555 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1556 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1557 } 1558 ierr = PetscFree(ct);CHKERRQ(ierr); 1559 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1560 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1561 /* 1562 Everyone has to call to draw the matrix since the graphics waits are 1563 synchronized across all processors that share the PetscDraw object 1564 */ 1565 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1566 if (!rank) { 1567 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1568 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1569 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1570 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1571 } 1572 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1573 ierr = MatDestroy(&A);CHKERRQ(ierr); 1574 } 1575 PetscFunctionReturn(0); 1576 } 1577 1578 #undef __FUNCT__ 1579 #define __FUNCT__ "MatView_MPIAIJ" 1580 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1581 { 1582 PetscErrorCode ierr; 1583 PetscBool iascii,isdraw,issocket,isbinary; 1584 1585 PetscFunctionBegin; 1586 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1587 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1588 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1589 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1590 if (iascii || isdraw || isbinary || issocket) { 1591 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1592 } else { 1593 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1594 } 1595 PetscFunctionReturn(0); 1596 } 1597 1598 #undef __FUNCT__ 1599 #define __FUNCT__ "MatSOR_MPIAIJ" 1600 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1601 { 1602 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1603 PetscErrorCode ierr; 1604 Vec bb1 = 0; 1605 PetscBool hasop; 1606 1607 PetscFunctionBegin; 1608 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1609 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1610 } 1611 1612 if (flag == SOR_APPLY_UPPER) { 1613 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1614 PetscFunctionReturn(0); 1615 } 1616 1617 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1618 if (flag & SOR_ZERO_INITIAL_GUESS) { 1619 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1620 its--; 1621 } 1622 1623 while (its--) { 1624 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1625 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1626 1627 /* update rhs: bb1 = bb - B*x */ 1628 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1629 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1630 1631 /* local sweep */ 1632 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1633 } 1634 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1635 if (flag & SOR_ZERO_INITIAL_GUESS) { 1636 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1637 its--; 1638 } 1639 while (its--) { 1640 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1641 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1642 1643 /* update rhs: bb1 = bb - B*x */ 1644 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1645 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1646 1647 /* local sweep */ 1648 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1649 } 1650 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1651 if (flag & SOR_ZERO_INITIAL_GUESS) { 1652 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1653 its--; 1654 } 1655 while (its--) { 1656 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1657 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1658 1659 /* update rhs: bb1 = bb - B*x */ 1660 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1661 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1662 1663 /* local sweep */ 1664 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1665 } 1666 } else if (flag & SOR_EISENSTAT) { 1667 Vec xx1; 1668 1669 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1670 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1671 1672 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1673 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1674 if (!mat->diag) { 1675 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1676 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1677 } 1678 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1679 if (hasop) { 1680 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1681 } else { 1682 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1683 } 1684 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1685 1686 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1687 1688 /* local sweep */ 1689 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1690 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1691 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1692 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1693 1694 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1695 PetscFunctionReturn(0); 1696 } 1697 1698 #undef __FUNCT__ 1699 #define __FUNCT__ "MatPermute_MPIAIJ" 1700 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1701 { 1702 MPI_Comm comm; 1703 PetscInt first,local_rowsize,local_colsize; 1704 const PetscInt *rows; 1705 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1706 PetscErrorCode ierr; 1707 1708 PetscFunctionBegin; 1709 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1710 /* make a collective version of 'rowp', this is to be tolerant of users who pass serial index sets */ 1711 ierr = ISOnComm(rowp,comm,PETSC_USE_POINTER,&crowp);CHKERRQ(ierr); 1712 /* collect the global row permutation and invert it */ 1713 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1714 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1715 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 1716 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1717 ierr = ISDestroy(&growp);CHKERRQ(ierr); 1718 /* get the local target indices */ 1719 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1720 ierr = MatGetLocalSize(A,&local_rowsize,&local_colsize);CHKERRQ(ierr); 1721 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1722 ierr = ISCreateGeneral(PETSC_COMM_SELF,local_rowsize,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 1723 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1724 ierr = ISDestroy(&irowp);CHKERRQ(ierr); 1725 /* the column permutation is so much easier; 1726 make a local version of 'colp' and invert it */ 1727 ierr = ISOnComm(colp,PETSC_COMM_SELF,PETSC_USE_POINTER,&lcolp);CHKERRQ(ierr); 1728 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1729 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1730 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 1731 /* now we just get the submatrix */ 1732 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_colsize,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1733 /* clean up */ 1734 ierr = ISDestroy(&lrowp);CHKERRQ(ierr); 1735 ierr = ISDestroy(&icolp);CHKERRQ(ierr); 1736 PetscFunctionReturn(0); 1737 } 1738 1739 #undef __FUNCT__ 1740 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1741 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1742 { 1743 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1744 Mat A = mat->A,B = mat->B; 1745 PetscErrorCode ierr; 1746 PetscReal isend[5],irecv[5]; 1747 1748 PetscFunctionBegin; 1749 info->block_size = 1.0; 1750 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1751 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1752 isend[3] = info->memory; isend[4] = info->mallocs; 1753 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1754 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1755 isend[3] += info->memory; isend[4] += info->mallocs; 1756 if (flag == MAT_LOCAL) { 1757 info->nz_used = isend[0]; 1758 info->nz_allocated = isend[1]; 1759 info->nz_unneeded = isend[2]; 1760 info->memory = isend[3]; 1761 info->mallocs = isend[4]; 1762 } else if (flag == MAT_GLOBAL_MAX) { 1763 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1764 info->nz_used = irecv[0]; 1765 info->nz_allocated = irecv[1]; 1766 info->nz_unneeded = irecv[2]; 1767 info->memory = irecv[3]; 1768 info->mallocs = irecv[4]; 1769 } else if (flag == MAT_GLOBAL_SUM) { 1770 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1771 info->nz_used = irecv[0]; 1772 info->nz_allocated = irecv[1]; 1773 info->nz_unneeded = irecv[2]; 1774 info->memory = irecv[3]; 1775 info->mallocs = irecv[4]; 1776 } 1777 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1778 info->fill_ratio_needed = 0; 1779 info->factor_mallocs = 0; 1780 PetscFunctionReturn(0); 1781 } 1782 1783 #undef __FUNCT__ 1784 #define __FUNCT__ "MatSetOption_MPIAIJ" 1785 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1786 { 1787 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1788 PetscErrorCode ierr; 1789 1790 PetscFunctionBegin; 1791 switch (op) { 1792 case MAT_NEW_NONZERO_LOCATIONS: 1793 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1794 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1795 case MAT_KEEP_NONZERO_PATTERN: 1796 case MAT_NEW_NONZERO_LOCATION_ERR: 1797 case MAT_USE_INODES: 1798 case MAT_IGNORE_ZERO_ENTRIES: 1799 MatCheckPreallocated(A,1); 1800 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1801 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1802 break; 1803 case MAT_ROW_ORIENTED: 1804 a->roworiented = flg; 1805 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1806 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1807 break; 1808 case MAT_NEW_DIAGONALS: 1809 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1810 break; 1811 case MAT_IGNORE_OFF_PROC_ENTRIES: 1812 a->donotstash = flg; 1813 break; 1814 case MAT_SPD: 1815 A->spd_set = PETSC_TRUE; 1816 A->spd = flg; 1817 if (flg) { 1818 A->symmetric = PETSC_TRUE; 1819 A->structurally_symmetric = PETSC_TRUE; 1820 A->symmetric_set = PETSC_TRUE; 1821 A->structurally_symmetric_set = PETSC_TRUE; 1822 } 1823 break; 1824 case MAT_SYMMETRIC: 1825 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1826 break; 1827 case MAT_STRUCTURALLY_SYMMETRIC: 1828 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1829 break; 1830 case MAT_HERMITIAN: 1831 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1832 break; 1833 case MAT_SYMMETRY_ETERNAL: 1834 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1835 break; 1836 default: 1837 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1838 } 1839 PetscFunctionReturn(0); 1840 } 1841 1842 #undef __FUNCT__ 1843 #define __FUNCT__ "MatGetRow_MPIAIJ" 1844 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1845 { 1846 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1847 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1848 PetscErrorCode ierr; 1849 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1850 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1851 PetscInt *cmap,*idx_p; 1852 1853 PetscFunctionBegin; 1854 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1855 mat->getrowactive = PETSC_TRUE; 1856 1857 if (!mat->rowvalues && (idx || v)) { 1858 /* 1859 allocate enough space to hold information from the longest row. 1860 */ 1861 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1862 PetscInt max = 1,tmp; 1863 for (i=0; i<matin->rmap->n; i++) { 1864 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1865 if (max < tmp) { max = tmp; } 1866 } 1867 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1868 } 1869 1870 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1871 lrow = row - rstart; 1872 1873 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1874 if (!v) {pvA = 0; pvB = 0;} 1875 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1876 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1877 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1878 nztot = nzA + nzB; 1879 1880 cmap = mat->garray; 1881 if (v || idx) { 1882 if (nztot) { 1883 /* Sort by increasing column numbers, assuming A and B already sorted */ 1884 PetscInt imark = -1; 1885 if (v) { 1886 *v = v_p = mat->rowvalues; 1887 for (i=0; i<nzB; i++) { 1888 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1889 else break; 1890 } 1891 imark = i; 1892 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1893 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1894 } 1895 if (idx) { 1896 *idx = idx_p = mat->rowindices; 1897 if (imark > -1) { 1898 for (i=0; i<imark; i++) { 1899 idx_p[i] = cmap[cworkB[i]]; 1900 } 1901 } else { 1902 for (i=0; i<nzB; i++) { 1903 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1904 else break; 1905 } 1906 imark = i; 1907 } 1908 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1909 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1910 } 1911 } else { 1912 if (idx) *idx = 0; 1913 if (v) *v = 0; 1914 } 1915 } 1916 *nz = nztot; 1917 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1918 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1919 PetscFunctionReturn(0); 1920 } 1921 1922 #undef __FUNCT__ 1923 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1924 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1925 { 1926 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1927 1928 PetscFunctionBegin; 1929 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1930 aij->getrowactive = PETSC_FALSE; 1931 PetscFunctionReturn(0); 1932 } 1933 1934 #undef __FUNCT__ 1935 #define __FUNCT__ "MatNorm_MPIAIJ" 1936 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1937 { 1938 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1939 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1940 PetscErrorCode ierr; 1941 PetscInt i,j,cstart = mat->cmap->rstart; 1942 PetscReal sum = 0.0; 1943 MatScalar *v; 1944 1945 PetscFunctionBegin; 1946 if (aij->size == 1) { 1947 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1948 } else { 1949 if (type == NORM_FROBENIUS) { 1950 v = amat->a; 1951 for (i=0; i<amat->nz; i++) { 1952 #if defined(PETSC_USE_COMPLEX) 1953 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1954 #else 1955 sum += (*v)*(*v); v++; 1956 #endif 1957 } 1958 v = bmat->a; 1959 for (i=0; i<bmat->nz; i++) { 1960 #if defined(PETSC_USE_COMPLEX) 1961 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1962 #else 1963 sum += (*v)*(*v); v++; 1964 #endif 1965 } 1966 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1967 *norm = PetscSqrtReal(*norm); 1968 } else if (type == NORM_1) { /* max column norm */ 1969 PetscReal *tmp,*tmp2; 1970 PetscInt *jj,*garray = aij->garray; 1971 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1972 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1973 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1974 *norm = 0.0; 1975 v = amat->a; jj = amat->j; 1976 for (j=0; j<amat->nz; j++) { 1977 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1978 } 1979 v = bmat->a; jj = bmat->j; 1980 for (j=0; j<bmat->nz; j++) { 1981 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1982 } 1983 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1984 for (j=0; j<mat->cmap->N; j++) { 1985 if (tmp2[j] > *norm) *norm = tmp2[j]; 1986 } 1987 ierr = PetscFree(tmp);CHKERRQ(ierr); 1988 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1989 } else if (type == NORM_INFINITY) { /* max row norm */ 1990 PetscReal ntemp = 0.0; 1991 for (j=0; j<aij->A->rmap->n; j++) { 1992 v = amat->a + amat->i[j]; 1993 sum = 0.0; 1994 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1995 sum += PetscAbsScalar(*v); v++; 1996 } 1997 v = bmat->a + bmat->i[j]; 1998 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1999 sum += PetscAbsScalar(*v); v++; 2000 } 2001 if (sum > ntemp) ntemp = sum; 2002 } 2003 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 2004 } else { 2005 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 2006 } 2007 } 2008 PetscFunctionReturn(0); 2009 } 2010 2011 #undef __FUNCT__ 2012 #define __FUNCT__ "MatTranspose_MPIAIJ" 2013 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2014 { 2015 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2016 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2017 PetscErrorCode ierr; 2018 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 2019 PetscInt cstart=A->cmap->rstart,ncol; 2020 Mat B; 2021 MatScalar *array; 2022 2023 PetscFunctionBegin; 2024 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2025 2026 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 2027 ai = Aloc->i; aj = Aloc->j; 2028 bi = Bloc->i; bj = Bloc->j; 2029 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2030 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 2031 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2032 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 2033 for (i=0; i<ai[ma]; i++){ 2034 d_nnz[aj[i]] ++; 2035 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2036 } 2037 2038 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 2039 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2040 ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs); CHKERRQ(ierr); 2041 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2042 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 2043 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2044 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2045 } else { 2046 B = *matout; 2047 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2048 for (i=0; i<ai[ma]; i++){ 2049 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2050 } 2051 } 2052 2053 /* copy over the A part */ 2054 array = Aloc->a; 2055 row = A->rmap->rstart; 2056 for (i=0; i<ma; i++) { 2057 ncol = ai[i+1]-ai[i]; 2058 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2059 row++; array += ncol; aj += ncol; 2060 } 2061 aj = Aloc->j; 2062 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2063 2064 /* copy over the B part */ 2065 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2066 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2067 array = Bloc->a; 2068 row = A->rmap->rstart; 2069 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 2070 cols_tmp = cols; 2071 for (i=0; i<mb; i++) { 2072 ncol = bi[i+1]-bi[i]; 2073 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2074 row++; array += ncol; cols_tmp += ncol; 2075 } 2076 ierr = PetscFree(cols);CHKERRQ(ierr); 2077 2078 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2079 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2080 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2081 *matout = B; 2082 } else { 2083 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2084 } 2085 PetscFunctionReturn(0); 2086 } 2087 2088 #undef __FUNCT__ 2089 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2090 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2091 { 2092 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2093 Mat a = aij->A,b = aij->B; 2094 PetscErrorCode ierr; 2095 PetscInt s1,s2,s3; 2096 2097 PetscFunctionBegin; 2098 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2099 if (rr) { 2100 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2101 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2102 /* Overlap communication with computation. */ 2103 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2104 } 2105 if (ll) { 2106 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2107 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2108 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2109 } 2110 /* scale the diagonal block */ 2111 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2112 2113 if (rr) { 2114 /* Do a scatter end and then right scale the off-diagonal block */ 2115 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2116 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2117 } 2118 2119 PetscFunctionReturn(0); 2120 } 2121 2122 #undef __FUNCT__ 2123 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2124 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2125 { 2126 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2127 PetscErrorCode ierr; 2128 2129 PetscFunctionBegin; 2130 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2131 PetscFunctionReturn(0); 2132 } 2133 2134 #undef __FUNCT__ 2135 #define __FUNCT__ "MatEqual_MPIAIJ" 2136 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2137 { 2138 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2139 Mat a,b,c,d; 2140 PetscBool flg; 2141 PetscErrorCode ierr; 2142 2143 PetscFunctionBegin; 2144 a = matA->A; b = matA->B; 2145 c = matB->A; d = matB->B; 2146 2147 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2148 if (flg) { 2149 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2150 } 2151 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2152 PetscFunctionReturn(0); 2153 } 2154 2155 #undef __FUNCT__ 2156 #define __FUNCT__ "MatCopy_MPIAIJ" 2157 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2158 { 2159 PetscErrorCode ierr; 2160 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2161 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2162 2163 PetscFunctionBegin; 2164 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2165 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2166 /* because of the column compression in the off-processor part of the matrix a->B, 2167 the number of columns in a->B and b->B may be different, hence we cannot call 2168 the MatCopy() directly on the two parts. If need be, we can provide a more 2169 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2170 then copying the submatrices */ 2171 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2172 } else { 2173 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2174 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2175 } 2176 PetscFunctionReturn(0); 2177 } 2178 2179 #undef __FUNCT__ 2180 #define __FUNCT__ "MatSetUp_MPIAIJ" 2181 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2182 { 2183 PetscErrorCode ierr; 2184 2185 PetscFunctionBegin; 2186 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2187 PetscFunctionReturn(0); 2188 } 2189 2190 #undef __FUNCT__ 2191 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2192 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2193 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt* nnz) 2194 { 2195 PetscInt i,m=Y->rmap->N; 2196 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2197 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2198 const PetscInt *xi = x->i,*yi = y->i; 2199 2200 PetscFunctionBegin; 2201 /* Set the number of nonzeros in the new matrix */ 2202 for(i=0; i<m; i++) { 2203 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2204 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2205 nnz[i] = 0; 2206 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2207 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2208 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2209 nnz[i]++; 2210 } 2211 for (; k<nzy; k++) nnz[i]++; 2212 } 2213 PetscFunctionReturn(0); 2214 } 2215 2216 #undef __FUNCT__ 2217 #define __FUNCT__ "MatAXPY_MPIAIJ" 2218 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2219 { 2220 PetscErrorCode ierr; 2221 PetscInt i; 2222 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 2223 PetscBLASInt bnz,one=1; 2224 Mat_SeqAIJ *x,*y; 2225 2226 PetscFunctionBegin; 2227 if (str == SAME_NONZERO_PATTERN) { 2228 PetscScalar alpha = a; 2229 x = (Mat_SeqAIJ *)xx->A->data; 2230 y = (Mat_SeqAIJ *)yy->A->data; 2231 bnz = PetscBLASIntCast(x->nz); 2232 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2233 x = (Mat_SeqAIJ *)xx->B->data; 2234 y = (Mat_SeqAIJ *)yy->B->data; 2235 bnz = PetscBLASIntCast(x->nz); 2236 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2237 } else if (str == SUBSET_NONZERO_PATTERN) { 2238 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2239 2240 x = (Mat_SeqAIJ *)xx->B->data; 2241 y = (Mat_SeqAIJ *)yy->B->data; 2242 if (y->xtoy && y->XtoY != xx->B) { 2243 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2244 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2245 } 2246 if (!y->xtoy) { /* get xtoy */ 2247 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2248 y->XtoY = xx->B; 2249 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2250 } 2251 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2252 } else { 2253 Mat B; 2254 PetscInt *nnz_d,*nnz_o; 2255 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2256 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2257 ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr); 2258 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2259 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2260 ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr); 2261 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2262 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2263 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2264 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2265 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2266 ierr = MatHeaderReplace(Y,B); 2267 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2268 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2269 } 2270 PetscFunctionReturn(0); 2271 } 2272 2273 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2274 2275 #undef __FUNCT__ 2276 #define __FUNCT__ "MatConjugate_MPIAIJ" 2277 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2278 { 2279 #if defined(PETSC_USE_COMPLEX) 2280 PetscErrorCode ierr; 2281 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2282 2283 PetscFunctionBegin; 2284 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2285 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2286 #else 2287 PetscFunctionBegin; 2288 #endif 2289 PetscFunctionReturn(0); 2290 } 2291 2292 #undef __FUNCT__ 2293 #define __FUNCT__ "MatRealPart_MPIAIJ" 2294 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2295 { 2296 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2297 PetscErrorCode ierr; 2298 2299 PetscFunctionBegin; 2300 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2301 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2302 PetscFunctionReturn(0); 2303 } 2304 2305 #undef __FUNCT__ 2306 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2307 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2308 { 2309 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2310 PetscErrorCode ierr; 2311 2312 PetscFunctionBegin; 2313 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2314 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2315 PetscFunctionReturn(0); 2316 } 2317 2318 #ifdef PETSC_HAVE_PBGL 2319 2320 #include <boost/parallel/mpi/bsp_process_group.hpp> 2321 #include <boost/graph/distributed/ilu_default_graph.hpp> 2322 #include <boost/graph/distributed/ilu_0_block.hpp> 2323 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2324 #include <boost/graph/distributed/petsc/interface.hpp> 2325 #include <boost/multi_array.hpp> 2326 #include <boost/parallel/distributed_property_map->hpp> 2327 2328 #undef __FUNCT__ 2329 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2330 /* 2331 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2332 */ 2333 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2334 { 2335 namespace petsc = boost::distributed::petsc; 2336 2337 namespace graph_dist = boost::graph::distributed; 2338 using boost::graph::distributed::ilu_default::process_group_type; 2339 using boost::graph::ilu_permuted; 2340 2341 PetscBool row_identity, col_identity; 2342 PetscContainer c; 2343 PetscInt m, n, M, N; 2344 PetscErrorCode ierr; 2345 2346 PetscFunctionBegin; 2347 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2348 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2349 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2350 if (!row_identity || !col_identity) { 2351 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2352 } 2353 2354 process_group_type pg; 2355 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2356 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2357 lgraph_type& level_graph = *lgraph_p; 2358 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2359 2360 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2361 ilu_permuted(level_graph); 2362 2363 /* put together the new matrix */ 2364 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 2365 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2366 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2367 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2368 ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs); CHKERRQ(ierr); 2369 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2370 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2371 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2372 2373 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 2374 ierr = PetscContainerSetPointer(c, lgraph_p); 2375 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2376 ierr = PetscContainerDestroy(&c); 2377 PetscFunctionReturn(0); 2378 } 2379 2380 #undef __FUNCT__ 2381 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2382 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2383 { 2384 PetscFunctionBegin; 2385 PetscFunctionReturn(0); 2386 } 2387 2388 #undef __FUNCT__ 2389 #define __FUNCT__ "MatSolve_MPIAIJ" 2390 /* 2391 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2392 */ 2393 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2394 { 2395 namespace graph_dist = boost::graph::distributed; 2396 2397 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2398 lgraph_type* lgraph_p; 2399 PetscContainer c; 2400 PetscErrorCode ierr; 2401 2402 PetscFunctionBegin; 2403 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 2404 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 2405 ierr = VecCopy(b, x);CHKERRQ(ierr); 2406 2407 PetscScalar* array_x; 2408 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2409 PetscInt sx; 2410 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2411 2412 PetscScalar* array_b; 2413 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2414 PetscInt sb; 2415 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2416 2417 lgraph_type& level_graph = *lgraph_p; 2418 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2419 2420 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2421 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2422 ref_x(array_x, boost::extents[num_vertices(graph)]); 2423 2424 typedef boost::iterator_property_map<array_ref_type::iterator, 2425 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2426 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2427 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2428 2429 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2430 2431 PetscFunctionReturn(0); 2432 } 2433 #endif 2434 2435 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2436 PetscInt nzlocal,nsends,nrecvs; 2437 PetscMPIInt *send_rank,*recv_rank; 2438 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2439 PetscScalar *sbuf_a,**rbuf_a; 2440 PetscErrorCode (*Destroy)(Mat); 2441 } Mat_Redundant; 2442 2443 #undef __FUNCT__ 2444 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2445 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2446 { 2447 PetscErrorCode ierr; 2448 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2449 PetscInt i; 2450 2451 PetscFunctionBegin; 2452 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2453 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2454 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2455 for (i=0; i<redund->nrecvs; i++){ 2456 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2457 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2458 } 2459 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2460 ierr = PetscFree(redund);CHKERRQ(ierr); 2461 PetscFunctionReturn(0); 2462 } 2463 2464 #undef __FUNCT__ 2465 #define __FUNCT__ "MatDestroy_MatRedundant" 2466 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2467 { 2468 PetscErrorCode ierr; 2469 PetscContainer container; 2470 Mat_Redundant *redund=PETSC_NULL; 2471 2472 PetscFunctionBegin; 2473 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2474 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2475 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2476 A->ops->destroy = redund->Destroy; 2477 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2478 if (A->ops->destroy) { 2479 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2480 } 2481 PetscFunctionReturn(0); 2482 } 2483 2484 #undef __FUNCT__ 2485 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2486 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2487 { 2488 PetscMPIInt rank,size; 2489 MPI_Comm comm=((PetscObject)mat)->comm; 2490 PetscErrorCode ierr; 2491 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2492 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2493 PetscInt *rowrange=mat->rmap->range; 2494 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2495 Mat A=aij->A,B=aij->B,C=*matredundant; 2496 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2497 PetscScalar *sbuf_a; 2498 PetscInt nzlocal=a->nz+b->nz; 2499 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2500 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2501 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2502 MatScalar *aworkA,*aworkB; 2503 PetscScalar *vals; 2504 PetscMPIInt tag1,tag2,tag3,imdex; 2505 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2506 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2507 MPI_Status recv_status,*send_status; 2508 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2509 PetscInt **rbuf_j=PETSC_NULL; 2510 PetscScalar **rbuf_a=PETSC_NULL; 2511 Mat_Redundant *redund=PETSC_NULL; 2512 PetscContainer container; 2513 2514 PetscFunctionBegin; 2515 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2516 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2517 2518 if (reuse == MAT_REUSE_MATRIX) { 2519 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2520 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2521 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2522 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2523 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2524 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2525 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2526 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2527 2528 nsends = redund->nsends; 2529 nrecvs = redund->nrecvs; 2530 send_rank = redund->send_rank; 2531 recv_rank = redund->recv_rank; 2532 sbuf_nz = redund->sbuf_nz; 2533 rbuf_nz = redund->rbuf_nz; 2534 sbuf_j = redund->sbuf_j; 2535 sbuf_a = redund->sbuf_a; 2536 rbuf_j = redund->rbuf_j; 2537 rbuf_a = redund->rbuf_a; 2538 } 2539 2540 if (reuse == MAT_INITIAL_MATRIX){ 2541 PetscMPIInt subrank,subsize; 2542 PetscInt nleftover,np_subcomm; 2543 /* get the destination processors' id send_rank, nsends and nrecvs */ 2544 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2545 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2546 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2547 np_subcomm = size/nsubcomm; 2548 nleftover = size - nsubcomm*np_subcomm; 2549 nsends = 0; nrecvs = 0; 2550 for (i=0; i<size; i++){ /* i=rank*/ 2551 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2552 send_rank[nsends] = i; nsends++; 2553 recv_rank[nrecvs++] = i; 2554 } 2555 } 2556 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2557 i = size-nleftover-1; 2558 j = 0; 2559 while (j < nsubcomm - nleftover){ 2560 send_rank[nsends++] = i; 2561 i--; j++; 2562 } 2563 } 2564 2565 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2566 for (i=0; i<nleftover; i++){ 2567 recv_rank[nrecvs++] = size-nleftover+i; 2568 } 2569 } 2570 2571 /* allocate sbuf_j, sbuf_a */ 2572 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2573 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2574 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2575 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2576 2577 /* copy mat's local entries into the buffers */ 2578 if (reuse == MAT_INITIAL_MATRIX){ 2579 rownz_max = 0; 2580 rptr = sbuf_j; 2581 cols = sbuf_j + rend-rstart + 1; 2582 vals = sbuf_a; 2583 rptr[0] = 0; 2584 for (i=0; i<rend-rstart; i++){ 2585 row = i + rstart; 2586 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2587 ncols = nzA + nzB; 2588 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2589 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2590 /* load the column indices for this row into cols */ 2591 lwrite = 0; 2592 for (l=0; l<nzB; l++) { 2593 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2594 vals[lwrite] = aworkB[l]; 2595 cols[lwrite++] = ctmp; 2596 } 2597 } 2598 for (l=0; l<nzA; l++){ 2599 vals[lwrite] = aworkA[l]; 2600 cols[lwrite++] = cstart + cworkA[l]; 2601 } 2602 for (l=0; l<nzB; l++) { 2603 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2604 vals[lwrite] = aworkB[l]; 2605 cols[lwrite++] = ctmp; 2606 } 2607 } 2608 vals += ncols; 2609 cols += ncols; 2610 rptr[i+1] = rptr[i] + ncols; 2611 if (rownz_max < ncols) rownz_max = ncols; 2612 } 2613 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2614 } else { /* only copy matrix values into sbuf_a */ 2615 rptr = sbuf_j; 2616 vals = sbuf_a; 2617 rptr[0] = 0; 2618 for (i=0; i<rend-rstart; i++){ 2619 row = i + rstart; 2620 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2621 ncols = nzA + nzB; 2622 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2623 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2624 lwrite = 0; 2625 for (l=0; l<nzB; l++) { 2626 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2627 } 2628 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2629 for (l=0; l<nzB; l++) { 2630 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2631 } 2632 vals += ncols; 2633 rptr[i+1] = rptr[i] + ncols; 2634 } 2635 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2636 2637 /* send nzlocal to others, and recv other's nzlocal */ 2638 /*--------------------------------------------------*/ 2639 if (reuse == MAT_INITIAL_MATRIX){ 2640 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2641 s_waits2 = s_waits3 + nsends; 2642 s_waits1 = s_waits2 + nsends; 2643 r_waits1 = s_waits1 + nsends; 2644 r_waits2 = r_waits1 + nrecvs; 2645 r_waits3 = r_waits2 + nrecvs; 2646 } else { 2647 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2648 r_waits3 = s_waits3 + nsends; 2649 } 2650 2651 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2652 if (reuse == MAT_INITIAL_MATRIX){ 2653 /* get new tags to keep the communication clean */ 2654 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2655 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2656 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2657 2658 /* post receives of other's nzlocal */ 2659 for (i=0; i<nrecvs; i++){ 2660 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2661 } 2662 /* send nzlocal to others */ 2663 for (i=0; i<nsends; i++){ 2664 sbuf_nz[i] = nzlocal; 2665 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2666 } 2667 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2668 count = nrecvs; 2669 while (count) { 2670 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2671 recv_rank[imdex] = recv_status.MPI_SOURCE; 2672 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2673 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2674 2675 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2676 rbuf_nz[imdex] += i + 2; 2677 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2678 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2679 count--; 2680 } 2681 /* wait on sends of nzlocal */ 2682 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2683 /* send mat->i,j to others, and recv from other's */ 2684 /*------------------------------------------------*/ 2685 for (i=0; i<nsends; i++){ 2686 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2687 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2688 } 2689 /* wait on receives of mat->i,j */ 2690 /*------------------------------*/ 2691 count = nrecvs; 2692 while (count) { 2693 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2694 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2695 count--; 2696 } 2697 /* wait on sends of mat->i,j */ 2698 /*---------------------------*/ 2699 if (nsends) { 2700 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2701 } 2702 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2703 2704 /* post receives, send and receive mat->a */ 2705 /*----------------------------------------*/ 2706 for (imdex=0; imdex<nrecvs; imdex++) { 2707 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2708 } 2709 for (i=0; i<nsends; i++){ 2710 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2711 } 2712 count = nrecvs; 2713 while (count) { 2714 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2715 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2716 count--; 2717 } 2718 if (nsends) { 2719 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2720 } 2721 2722 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2723 2724 /* create redundant matrix */ 2725 /*-------------------------*/ 2726 if (reuse == MAT_INITIAL_MATRIX){ 2727 /* compute rownz_max for preallocation */ 2728 for (imdex=0; imdex<nrecvs; imdex++){ 2729 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2730 rptr = rbuf_j[imdex]; 2731 for (i=0; i<j; i++){ 2732 ncols = rptr[i+1] - rptr[i]; 2733 if (rownz_max < ncols) rownz_max = ncols; 2734 } 2735 } 2736 2737 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2738 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2739 ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs); CHKERRQ(ierr); 2740 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2741 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2742 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2743 } else { 2744 C = *matredundant; 2745 } 2746 2747 /* insert local matrix entries */ 2748 rptr = sbuf_j; 2749 cols = sbuf_j + rend-rstart + 1; 2750 vals = sbuf_a; 2751 for (i=0; i<rend-rstart; i++){ 2752 row = i + rstart; 2753 ncols = rptr[i+1] - rptr[i]; 2754 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2755 vals += ncols; 2756 cols += ncols; 2757 } 2758 /* insert received matrix entries */ 2759 for (imdex=0; imdex<nrecvs; imdex++){ 2760 rstart = rowrange[recv_rank[imdex]]; 2761 rend = rowrange[recv_rank[imdex]+1]; 2762 rptr = rbuf_j[imdex]; 2763 cols = rbuf_j[imdex] + rend-rstart + 1; 2764 vals = rbuf_a[imdex]; 2765 for (i=0; i<rend-rstart; i++){ 2766 row = i + rstart; 2767 ncols = rptr[i+1] - rptr[i]; 2768 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2769 vals += ncols; 2770 cols += ncols; 2771 } 2772 } 2773 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2774 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2775 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2776 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2777 if (reuse == MAT_INITIAL_MATRIX) { 2778 PetscContainer container; 2779 *matredundant = C; 2780 /* create a supporting struct and attach it to C for reuse */ 2781 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2782 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2783 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2784 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2785 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2786 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 2787 2788 redund->nzlocal = nzlocal; 2789 redund->nsends = nsends; 2790 redund->nrecvs = nrecvs; 2791 redund->send_rank = send_rank; 2792 redund->recv_rank = recv_rank; 2793 redund->sbuf_nz = sbuf_nz; 2794 redund->rbuf_nz = rbuf_nz; 2795 redund->sbuf_j = sbuf_j; 2796 redund->sbuf_a = sbuf_a; 2797 redund->rbuf_j = rbuf_j; 2798 redund->rbuf_a = rbuf_a; 2799 2800 redund->Destroy = C->ops->destroy; 2801 C->ops->destroy = MatDestroy_MatRedundant; 2802 } 2803 PetscFunctionReturn(0); 2804 } 2805 2806 #undef __FUNCT__ 2807 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2808 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2809 { 2810 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2811 PetscErrorCode ierr; 2812 PetscInt i,*idxb = 0; 2813 PetscScalar *va,*vb; 2814 Vec vtmp; 2815 2816 PetscFunctionBegin; 2817 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2818 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2819 if (idx) { 2820 for (i=0; i<A->rmap->n; i++) { 2821 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2822 } 2823 } 2824 2825 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2826 if (idx) { 2827 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2828 } 2829 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2830 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2831 2832 for (i=0; i<A->rmap->n; i++){ 2833 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2834 va[i] = vb[i]; 2835 if (idx) idx[i] = a->garray[idxb[i]]; 2836 } 2837 } 2838 2839 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2840 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2841 ierr = PetscFree(idxb);CHKERRQ(ierr); 2842 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2843 PetscFunctionReturn(0); 2844 } 2845 2846 #undef __FUNCT__ 2847 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2848 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2849 { 2850 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2851 PetscErrorCode ierr; 2852 PetscInt i,*idxb = 0; 2853 PetscScalar *va,*vb; 2854 Vec vtmp; 2855 2856 PetscFunctionBegin; 2857 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2858 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2859 if (idx) { 2860 for (i=0; i<A->cmap->n; i++) { 2861 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2862 } 2863 } 2864 2865 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2866 if (idx) { 2867 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2868 } 2869 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2870 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2871 2872 for (i=0; i<A->rmap->n; i++){ 2873 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2874 va[i] = vb[i]; 2875 if (idx) idx[i] = a->garray[idxb[i]]; 2876 } 2877 } 2878 2879 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2880 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2881 ierr = PetscFree(idxb);CHKERRQ(ierr); 2882 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2883 PetscFunctionReturn(0); 2884 } 2885 2886 #undef __FUNCT__ 2887 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2888 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2889 { 2890 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2891 PetscInt n = A->rmap->n; 2892 PetscInt cstart = A->cmap->rstart; 2893 PetscInt *cmap = mat->garray; 2894 PetscInt *diagIdx, *offdiagIdx; 2895 Vec diagV, offdiagV; 2896 PetscScalar *a, *diagA, *offdiagA; 2897 PetscInt r; 2898 PetscErrorCode ierr; 2899 2900 PetscFunctionBegin; 2901 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2902 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2903 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2904 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2905 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2906 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2907 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2908 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2909 for(r = 0; r < n; ++r) { 2910 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2911 a[r] = diagA[r]; 2912 idx[r] = cstart + diagIdx[r]; 2913 } else { 2914 a[r] = offdiagA[r]; 2915 idx[r] = cmap[offdiagIdx[r]]; 2916 } 2917 } 2918 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2919 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2920 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2921 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2922 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2923 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2924 PetscFunctionReturn(0); 2925 } 2926 2927 #undef __FUNCT__ 2928 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2929 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2930 { 2931 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2932 PetscInt n = A->rmap->n; 2933 PetscInt cstart = A->cmap->rstart; 2934 PetscInt *cmap = mat->garray; 2935 PetscInt *diagIdx, *offdiagIdx; 2936 Vec diagV, offdiagV; 2937 PetscScalar *a, *diagA, *offdiagA; 2938 PetscInt r; 2939 PetscErrorCode ierr; 2940 2941 PetscFunctionBegin; 2942 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2943 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2944 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2945 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2946 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2947 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2948 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2949 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2950 for(r = 0; r < n; ++r) { 2951 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2952 a[r] = diagA[r]; 2953 idx[r] = cstart + diagIdx[r]; 2954 } else { 2955 a[r] = offdiagA[r]; 2956 idx[r] = cmap[offdiagIdx[r]]; 2957 } 2958 } 2959 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2960 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2961 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2962 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2963 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2964 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2965 PetscFunctionReturn(0); 2966 } 2967 2968 #undef __FUNCT__ 2969 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 2970 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2971 { 2972 PetscErrorCode ierr; 2973 Mat *dummy; 2974 2975 PetscFunctionBegin; 2976 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2977 *newmat = *dummy; 2978 ierr = PetscFree(dummy);CHKERRQ(ierr); 2979 PetscFunctionReturn(0); 2980 } 2981 2982 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2983 2984 #undef __FUNCT__ 2985 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 2986 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2987 { 2988 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2989 PetscErrorCode ierr; 2990 2991 PetscFunctionBegin; 2992 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2993 PetscFunctionReturn(0); 2994 } 2995 2996 2997 /* -------------------------------------------------------------------*/ 2998 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2999 MatGetRow_MPIAIJ, 3000 MatRestoreRow_MPIAIJ, 3001 MatMult_MPIAIJ, 3002 /* 4*/ MatMultAdd_MPIAIJ, 3003 MatMultTranspose_MPIAIJ, 3004 MatMultTransposeAdd_MPIAIJ, 3005 #ifdef PETSC_HAVE_PBGL 3006 MatSolve_MPIAIJ, 3007 #else 3008 0, 3009 #endif 3010 0, 3011 0, 3012 /*10*/ 0, 3013 0, 3014 0, 3015 MatSOR_MPIAIJ, 3016 MatTranspose_MPIAIJ, 3017 /*15*/ MatGetInfo_MPIAIJ, 3018 MatEqual_MPIAIJ, 3019 MatGetDiagonal_MPIAIJ, 3020 MatDiagonalScale_MPIAIJ, 3021 MatNorm_MPIAIJ, 3022 /*20*/ MatAssemblyBegin_MPIAIJ, 3023 MatAssemblyEnd_MPIAIJ, 3024 MatSetOption_MPIAIJ, 3025 MatZeroEntries_MPIAIJ, 3026 /*24*/ MatZeroRows_MPIAIJ, 3027 0, 3028 #ifdef PETSC_HAVE_PBGL 3029 0, 3030 #else 3031 0, 3032 #endif 3033 0, 3034 0, 3035 /*29*/ MatSetUp_MPIAIJ, 3036 #ifdef PETSC_HAVE_PBGL 3037 0, 3038 #else 3039 0, 3040 #endif 3041 0, 3042 0, 3043 0, 3044 /*34*/ MatDuplicate_MPIAIJ, 3045 0, 3046 0, 3047 0, 3048 0, 3049 /*39*/ MatAXPY_MPIAIJ, 3050 MatGetSubMatrices_MPIAIJ, 3051 MatIncreaseOverlap_MPIAIJ, 3052 MatGetValues_MPIAIJ, 3053 MatCopy_MPIAIJ, 3054 /*44*/ MatGetRowMax_MPIAIJ, 3055 MatScale_MPIAIJ, 3056 0, 3057 0, 3058 MatZeroRowsColumns_MPIAIJ, 3059 /*49*/ 0, 3060 0, 3061 0, 3062 0, 3063 0, 3064 /*54*/ MatFDColoringCreate_MPIAIJ, 3065 0, 3066 MatSetUnfactored_MPIAIJ, 3067 0, /* MatPermute_MPIAIJ, impl currently broken */ 3068 0, 3069 /*59*/ MatGetSubMatrix_MPIAIJ, 3070 MatDestroy_MPIAIJ, 3071 MatView_MPIAIJ, 3072 0, 3073 0, 3074 /*64*/ 0, 3075 0, 3076 0, 3077 0, 3078 0, 3079 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3080 MatGetRowMinAbs_MPIAIJ, 3081 0, 3082 MatSetColoring_MPIAIJ, 3083 #if defined(PETSC_HAVE_ADIC) 3084 MatSetValuesAdic_MPIAIJ, 3085 #else 3086 0, 3087 #endif 3088 MatSetValuesAdifor_MPIAIJ, 3089 /*75*/ MatFDColoringApply_AIJ, 3090 0, 3091 0, 3092 0, 3093 0, 3094 /*80*/ 0, 3095 0, 3096 0, 3097 /*83*/ MatLoad_MPIAIJ, 3098 0, 3099 0, 3100 0, 3101 0, 3102 0, 3103 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3104 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3105 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3106 MatPtAP_Basic, 3107 MatPtAPSymbolic_MPIAIJ, 3108 /*94*/ MatPtAPNumeric_MPIAIJ, 3109 0, 3110 0, 3111 0, 3112 0, 3113 /*99*/ 0, 3114 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3115 MatPtAPNumeric_MPIAIJ_MPIAIJ, 3116 MatConjugate_MPIAIJ, 3117 0, 3118 /*104*/MatSetValuesRow_MPIAIJ, 3119 MatRealPart_MPIAIJ, 3120 MatImaginaryPart_MPIAIJ, 3121 0, 3122 0, 3123 /*109*/0, 3124 MatGetRedundantMatrix_MPIAIJ, 3125 MatGetRowMin_MPIAIJ, 3126 0, 3127 0, 3128 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3129 0, 3130 0, 3131 0, 3132 0, 3133 /*119*/0, 3134 0, 3135 0, 3136 0, 3137 MatGetMultiProcBlock_MPIAIJ, 3138 /*124*/MatFindNonZeroRows_MPIAIJ, 3139 MatGetColumnNorms_MPIAIJ, 3140 MatInvertBlockDiagonal_MPIAIJ, 3141 0, 3142 MatGetSubMatricesParallel_MPIAIJ, 3143 /*129*/0, 3144 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3145 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3146 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3147 0, 3148 /*134*/0, 3149 0, 3150 0, 3151 0, 3152 0 3153 }; 3154 3155 /* ----------------------------------------------------------------------------------------*/ 3156 3157 EXTERN_C_BEGIN 3158 #undef __FUNCT__ 3159 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3160 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3161 { 3162 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3163 PetscErrorCode ierr; 3164 3165 PetscFunctionBegin; 3166 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3167 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3168 PetscFunctionReturn(0); 3169 } 3170 EXTERN_C_END 3171 3172 EXTERN_C_BEGIN 3173 #undef __FUNCT__ 3174 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3175 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3176 { 3177 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3178 PetscErrorCode ierr; 3179 3180 PetscFunctionBegin; 3181 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3182 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3183 PetscFunctionReturn(0); 3184 } 3185 EXTERN_C_END 3186 3187 EXTERN_C_BEGIN 3188 #undef __FUNCT__ 3189 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3190 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3191 { 3192 Mat_MPIAIJ *b; 3193 PetscErrorCode ierr; 3194 PetscInt i; 3195 PetscBool d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE; 3196 3197 PetscFunctionBegin; 3198 if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE; 3199 if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE; 3200 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3201 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3202 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3203 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3204 3205 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3206 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3207 if (d_nnz) { 3208 for (i=0; i<B->rmap->n; i++) { 3209 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 3210 } 3211 } 3212 if (o_nnz) { 3213 for (i=0; i<B->rmap->n; i++) { 3214 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 3215 } 3216 } 3217 b = (Mat_MPIAIJ*)B->data; 3218 3219 if (!B->preallocated) { 3220 /* Explicitly create 2 MATSEQAIJ matrices. */ 3221 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3222 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3223 ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3224 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3225 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3226 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3227 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3228 ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3229 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3230 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3231 } 3232 3233 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3234 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3235 /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */ 3236 if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3237 if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3238 B->preallocated = PETSC_TRUE; 3239 PetscFunctionReturn(0); 3240 } 3241 EXTERN_C_END 3242 3243 #undef __FUNCT__ 3244 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3245 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3246 { 3247 Mat mat; 3248 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3249 PetscErrorCode ierr; 3250 3251 PetscFunctionBegin; 3252 *newmat = 0; 3253 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3254 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3255 ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr); 3256 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3257 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3258 a = (Mat_MPIAIJ*)mat->data; 3259 3260 mat->factortype = matin->factortype; 3261 mat->rmap->bs = matin->rmap->bs; 3262 mat->cmap->bs = matin->cmap->bs; 3263 mat->assembled = PETSC_TRUE; 3264 mat->insertmode = NOT_SET_VALUES; 3265 mat->preallocated = PETSC_TRUE; 3266 3267 a->size = oldmat->size; 3268 a->rank = oldmat->rank; 3269 a->donotstash = oldmat->donotstash; 3270 a->roworiented = oldmat->roworiented; 3271 a->rowindices = 0; 3272 a->rowvalues = 0; 3273 a->getrowactive = PETSC_FALSE; 3274 3275 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3276 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3277 3278 if (oldmat->colmap) { 3279 #if defined (PETSC_USE_CTABLE) 3280 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3281 #else 3282 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3283 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3284 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3285 #endif 3286 } else a->colmap = 0; 3287 if (oldmat->garray) { 3288 PetscInt len; 3289 len = oldmat->B->cmap->n; 3290 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3291 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3292 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3293 } else a->garray = 0; 3294 3295 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3296 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3297 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3298 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3299 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3300 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3301 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3302 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3303 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3304 *newmat = mat; 3305 PetscFunctionReturn(0); 3306 } 3307 3308 3309 3310 #undef __FUNCT__ 3311 #define __FUNCT__ "MatLoad_MPIAIJ" 3312 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3313 { 3314 PetscScalar *vals,*svals; 3315 MPI_Comm comm = ((PetscObject)viewer)->comm; 3316 PetscErrorCode ierr; 3317 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3318 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3319 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3320 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 3321 PetscInt cend,cstart,n,*rowners,sizesset=1; 3322 int fd; 3323 PetscInt bs = 1; 3324 3325 PetscFunctionBegin; 3326 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3327 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3328 if (!rank) { 3329 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3330 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3331 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3332 } 3333 3334 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3335 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3336 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3337 3338 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3339 3340 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3341 M = header[1]; N = header[2]; 3342 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3343 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3344 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3345 3346 /* If global sizes are set, check if they are consistent with that given in the file */ 3347 if (sizesset) { 3348 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3349 } 3350 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3351 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3352 3353 /* determine ownership of all (block) rows */ 3354 if( M%bs ) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3355 if (newMat->rmap->n < 0 ) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3356 else m = newMat->rmap->n; /* Set by user */ 3357 3358 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3359 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3360 3361 /* First process needs enough room for process with most rows */ 3362 if (!rank) { 3363 mmax = rowners[1]; 3364 for (i=2; i<size; i++) { 3365 mmax = PetscMax(mmax,rowners[i]); 3366 } 3367 } else mmax = m; 3368 3369 rowners[0] = 0; 3370 for (i=2; i<=size; i++) { 3371 rowners[i] += rowners[i-1]; 3372 } 3373 rstart = rowners[rank]; 3374 rend = rowners[rank+1]; 3375 3376 /* distribute row lengths to all processors */ 3377 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 3378 if (!rank) { 3379 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3380 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3381 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3382 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3383 for (j=0; j<m; j++) { 3384 procsnz[0] += ourlens[j]; 3385 } 3386 for (i=1; i<size; i++) { 3387 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3388 /* calculate the number of nonzeros on each processor */ 3389 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3390 procsnz[i] += rowlengths[j]; 3391 } 3392 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3393 } 3394 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3395 } else { 3396 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3397 } 3398 3399 if (!rank) { 3400 /* determine max buffer needed and allocate it */ 3401 maxnz = 0; 3402 for (i=0; i<size; i++) { 3403 maxnz = PetscMax(maxnz,procsnz[i]); 3404 } 3405 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3406 3407 /* read in my part of the matrix column indices */ 3408 nz = procsnz[0]; 3409 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3410 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3411 3412 /* read in every one elses and ship off */ 3413 for (i=1; i<size; i++) { 3414 nz = procsnz[i]; 3415 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3416 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3417 } 3418 ierr = PetscFree(cols);CHKERRQ(ierr); 3419 } else { 3420 /* determine buffer space needed for message */ 3421 nz = 0; 3422 for (i=0; i<m; i++) { 3423 nz += ourlens[i]; 3424 } 3425 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3426 3427 /* receive message of column indices*/ 3428 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3429 } 3430 3431 /* determine column ownership if matrix is not square */ 3432 if (N != M) { 3433 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3434 else n = newMat->cmap->n; 3435 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3436 cstart = cend - n; 3437 } else { 3438 cstart = rstart; 3439 cend = rend; 3440 n = cend - cstart; 3441 } 3442 3443 /* loop over local rows, determining number of off diagonal entries */ 3444 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3445 jj = 0; 3446 for (i=0; i<m; i++) { 3447 for (j=0; j<ourlens[i]; j++) { 3448 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3449 jj++; 3450 } 3451 } 3452 3453 for (i=0; i<m; i++) { 3454 ourlens[i] -= offlens[i]; 3455 } 3456 if (!sizesset) { 3457 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3458 } 3459 3460 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3461 3462 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3463 3464 for (i=0; i<m; i++) { 3465 ourlens[i] += offlens[i]; 3466 } 3467 3468 if (!rank) { 3469 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3470 3471 /* read in my part of the matrix numerical values */ 3472 nz = procsnz[0]; 3473 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3474 3475 /* insert into matrix */ 3476 jj = rstart; 3477 smycols = mycols; 3478 svals = vals; 3479 for (i=0; i<m; i++) { 3480 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3481 smycols += ourlens[i]; 3482 svals += ourlens[i]; 3483 jj++; 3484 } 3485 3486 /* read in other processors and ship out */ 3487 for (i=1; i<size; i++) { 3488 nz = procsnz[i]; 3489 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3490 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3491 } 3492 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3493 } else { 3494 /* receive numeric values */ 3495 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3496 3497 /* receive message of values*/ 3498 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3499 3500 /* insert into matrix */ 3501 jj = rstart; 3502 smycols = mycols; 3503 svals = vals; 3504 for (i=0; i<m; i++) { 3505 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3506 smycols += ourlens[i]; 3507 svals += ourlens[i]; 3508 jj++; 3509 } 3510 } 3511 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3512 ierr = PetscFree(vals);CHKERRQ(ierr); 3513 ierr = PetscFree(mycols);CHKERRQ(ierr); 3514 ierr = PetscFree(rowners);CHKERRQ(ierr); 3515 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3516 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3517 3518 PetscFunctionReturn(0); 3519 } 3520 3521 #undef __FUNCT__ 3522 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3523 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3524 { 3525 PetscErrorCode ierr; 3526 IS iscol_local; 3527 PetscInt csize; 3528 3529 PetscFunctionBegin; 3530 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3531 if (call == MAT_REUSE_MATRIX) { 3532 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3533 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3534 } else { 3535 PetscInt cbs; 3536 ierr = ISGetBlockSize(iscol,&cbs); CHKERRQ(ierr); 3537 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3538 ierr = ISSetBlockSize(iscol_local,cbs); CHKERRQ(ierr); 3539 } 3540 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3541 if (call == MAT_INITIAL_MATRIX) { 3542 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3543 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3544 } 3545 PetscFunctionReturn(0); 3546 } 3547 3548 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3549 #undef __FUNCT__ 3550 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3551 /* 3552 Not great since it makes two copies of the submatrix, first an SeqAIJ 3553 in local and then by concatenating the local matrices the end result. 3554 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3555 3556 Note: This requires a sequential iscol with all indices. 3557 */ 3558 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3559 { 3560 PetscErrorCode ierr; 3561 PetscMPIInt rank,size; 3562 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3563 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3564 PetscBool allcolumns, colflag; 3565 Mat M,Mreuse; 3566 MatScalar *vwork,*aa; 3567 MPI_Comm comm = ((PetscObject)mat)->comm; 3568 Mat_SeqAIJ *aij; 3569 3570 3571 PetscFunctionBegin; 3572 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3573 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3574 3575 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3576 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3577 if (colflag && ncol == mat->cmap->N){ 3578 allcolumns = PETSC_TRUE; 3579 } else { 3580 allcolumns = PETSC_FALSE; 3581 } 3582 if (call == MAT_REUSE_MATRIX) { 3583 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3584 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3585 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3586 } else { 3587 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3588 } 3589 3590 /* 3591 m - number of local rows 3592 n - number of columns (same on all processors) 3593 rstart - first row in new global matrix generated 3594 */ 3595 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3596 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3597 if (call == MAT_INITIAL_MATRIX) { 3598 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3599 ii = aij->i; 3600 jj = aij->j; 3601 3602 /* 3603 Determine the number of non-zeros in the diagonal and off-diagonal 3604 portions of the matrix in order to do correct preallocation 3605 */ 3606 3607 /* first get start and end of "diagonal" columns */ 3608 if (csize == PETSC_DECIDE) { 3609 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3610 if (mglobal == n) { /* square matrix */ 3611 nlocal = m; 3612 } else { 3613 nlocal = n/size + ((n % size) > rank); 3614 } 3615 } else { 3616 nlocal = csize; 3617 } 3618 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3619 rstart = rend - nlocal; 3620 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3621 3622 /* next, compute all the lengths */ 3623 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3624 olens = dlens + m; 3625 for (i=0; i<m; i++) { 3626 jend = ii[i+1] - ii[i]; 3627 olen = 0; 3628 dlen = 0; 3629 for (j=0; j<jend; j++) { 3630 if (*jj < rstart || *jj >= rend) olen++; 3631 else dlen++; 3632 jj++; 3633 } 3634 olens[i] = olen; 3635 dlens[i] = dlen; 3636 } 3637 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3638 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3639 ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr); 3640 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3641 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3642 ierr = PetscFree(dlens);CHKERRQ(ierr); 3643 } else { 3644 PetscInt ml,nl; 3645 3646 M = *newmat; 3647 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3648 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3649 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3650 /* 3651 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3652 rather than the slower MatSetValues(). 3653 */ 3654 M->was_assembled = PETSC_TRUE; 3655 M->assembled = PETSC_FALSE; 3656 } 3657 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3658 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3659 ii = aij->i; 3660 jj = aij->j; 3661 aa = aij->a; 3662 for (i=0; i<m; i++) { 3663 row = rstart + i; 3664 nz = ii[i+1] - ii[i]; 3665 cwork = jj; jj += nz; 3666 vwork = aa; aa += nz; 3667 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3668 } 3669 3670 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3671 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3672 *newmat = M; 3673 3674 /* save submatrix used in processor for next request */ 3675 if (call == MAT_INITIAL_MATRIX) { 3676 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3677 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3678 } 3679 3680 PetscFunctionReturn(0); 3681 } 3682 3683 EXTERN_C_BEGIN 3684 #undef __FUNCT__ 3685 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3686 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3687 { 3688 PetscInt m,cstart, cend,j,nnz,i,d; 3689 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3690 const PetscInt *JJ; 3691 PetscScalar *values; 3692 PetscErrorCode ierr; 3693 3694 PetscFunctionBegin; 3695 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3696 3697 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3698 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3699 m = B->rmap->n; 3700 cstart = B->cmap->rstart; 3701 cend = B->cmap->rend; 3702 rstart = B->rmap->rstart; 3703 3704 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3705 3706 #if defined(PETSC_USE_DEBUGGING) 3707 for (i=0; i<m; i++) { 3708 nnz = Ii[i+1]- Ii[i]; 3709 JJ = J + Ii[i]; 3710 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3711 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3712 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3713 } 3714 #endif 3715 3716 for (i=0; i<m; i++) { 3717 nnz = Ii[i+1]- Ii[i]; 3718 JJ = J + Ii[i]; 3719 nnz_max = PetscMax(nnz_max,nnz); 3720 d = 0; 3721 for (j=0; j<nnz; j++) { 3722 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3723 } 3724 d_nnz[i] = d; 3725 o_nnz[i] = nnz - d; 3726 } 3727 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3728 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3729 3730 if (v) values = (PetscScalar*)v; 3731 else { 3732 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3733 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3734 } 3735 3736 for (i=0; i<m; i++) { 3737 ii = i + rstart; 3738 nnz = Ii[i+1]- Ii[i]; 3739 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3740 } 3741 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3742 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3743 3744 if (!v) { 3745 ierr = PetscFree(values);CHKERRQ(ierr); 3746 } 3747 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3748 PetscFunctionReturn(0); 3749 } 3750 EXTERN_C_END 3751 3752 #undef __FUNCT__ 3753 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3754 /*@ 3755 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3756 (the default parallel PETSc format). 3757 3758 Collective on MPI_Comm 3759 3760 Input Parameters: 3761 + B - the matrix 3762 . i - the indices into j for the start of each local row (starts with zero) 3763 . j - the column indices for each local row (starts with zero) 3764 - v - optional values in the matrix 3765 3766 Level: developer 3767 3768 Notes: 3769 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3770 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3771 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3772 3773 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3774 3775 The format which is used for the sparse matrix input, is equivalent to a 3776 row-major ordering.. i.e for the following matrix, the input data expected is 3777 as shown: 3778 3779 1 0 0 3780 2 0 3 P0 3781 ------- 3782 4 5 6 P1 3783 3784 Process0 [P0]: rows_owned=[0,1] 3785 i = {0,1,3} [size = nrow+1 = 2+1] 3786 j = {0,0,2} [size = nz = 6] 3787 v = {1,2,3} [size = nz = 6] 3788 3789 Process1 [P1]: rows_owned=[2] 3790 i = {0,3} [size = nrow+1 = 1+1] 3791 j = {0,1,2} [size = nz = 6] 3792 v = {4,5,6} [size = nz = 6] 3793 3794 .keywords: matrix, aij, compressed row, sparse, parallel 3795 3796 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3797 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3798 @*/ 3799 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3800 { 3801 PetscErrorCode ierr; 3802 3803 PetscFunctionBegin; 3804 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3805 PetscFunctionReturn(0); 3806 } 3807 3808 #undef __FUNCT__ 3809 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3810 /*@C 3811 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3812 (the default parallel PETSc format). For good matrix assembly performance 3813 the user should preallocate the matrix storage by setting the parameters 3814 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3815 performance can be increased by more than a factor of 50. 3816 3817 Collective on MPI_Comm 3818 3819 Input Parameters: 3820 + A - the matrix 3821 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3822 (same value is used for all local rows) 3823 . d_nnz - array containing the number of nonzeros in the various rows of the 3824 DIAGONAL portion of the local submatrix (possibly different for each row) 3825 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3826 The size of this array is equal to the number of local rows, i.e 'm'. 3827 For matrices that will be factored, you must leave room for (and set) 3828 the diagonal entry even if it is zero. 3829 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3830 submatrix (same value is used for all local rows). 3831 - o_nnz - array containing the number of nonzeros in the various rows of the 3832 OFF-DIAGONAL portion of the local submatrix (possibly different for 3833 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3834 structure. The size of this array is equal to the number 3835 of local rows, i.e 'm'. 3836 3837 If the *_nnz parameter is given then the *_nz parameter is ignored 3838 3839 The AIJ format (also called the Yale sparse matrix format or 3840 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3841 storage. The stored row and column indices begin with zero. 3842 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3843 3844 The parallel matrix is partitioned such that the first m0 rows belong to 3845 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3846 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3847 3848 The DIAGONAL portion of the local submatrix of a processor can be defined 3849 as the submatrix which is obtained by extraction the part corresponding to 3850 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3851 first row that belongs to the processor, r2 is the last row belonging to 3852 the this processor, and c1-c2 is range of indices of the local part of a 3853 vector suitable for applying the matrix to. This is an mxn matrix. In the 3854 common case of a square matrix, the row and column ranges are the same and 3855 the DIAGONAL part is also square. The remaining portion of the local 3856 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3857 3858 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3859 3860 You can call MatGetInfo() to get information on how effective the preallocation was; 3861 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3862 You can also run with the option -info and look for messages with the string 3863 malloc in them to see if additional memory allocation was needed. 3864 3865 Example usage: 3866 3867 Consider the following 8x8 matrix with 34 non-zero values, that is 3868 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3869 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3870 as follows: 3871 3872 .vb 3873 1 2 0 | 0 3 0 | 0 4 3874 Proc0 0 5 6 | 7 0 0 | 8 0 3875 9 0 10 | 11 0 0 | 12 0 3876 ------------------------------------- 3877 13 0 14 | 15 16 17 | 0 0 3878 Proc1 0 18 0 | 19 20 21 | 0 0 3879 0 0 0 | 22 23 0 | 24 0 3880 ------------------------------------- 3881 Proc2 25 26 27 | 0 0 28 | 29 0 3882 30 0 0 | 31 32 33 | 0 34 3883 .ve 3884 3885 This can be represented as a collection of submatrices as: 3886 3887 .vb 3888 A B C 3889 D E F 3890 G H I 3891 .ve 3892 3893 Where the submatrices A,B,C are owned by proc0, D,E,F are 3894 owned by proc1, G,H,I are owned by proc2. 3895 3896 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3897 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3898 The 'M','N' parameters are 8,8, and have the same values on all procs. 3899 3900 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3901 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3902 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3903 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3904 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3905 matrix, ans [DF] as another SeqAIJ matrix. 3906 3907 When d_nz, o_nz parameters are specified, d_nz storage elements are 3908 allocated for every row of the local diagonal submatrix, and o_nz 3909 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3910 One way to choose d_nz and o_nz is to use the max nonzerors per local 3911 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3912 In this case, the values of d_nz,o_nz are: 3913 .vb 3914 proc0 : dnz = 2, o_nz = 2 3915 proc1 : dnz = 3, o_nz = 2 3916 proc2 : dnz = 1, o_nz = 4 3917 .ve 3918 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3919 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3920 for proc3. i.e we are using 12+15+10=37 storage locations to store 3921 34 values. 3922 3923 When d_nnz, o_nnz parameters are specified, the storage is specified 3924 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3925 In the above case the values for d_nnz,o_nnz are: 3926 .vb 3927 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3928 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3929 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3930 .ve 3931 Here the space allocated is sum of all the above values i.e 34, and 3932 hence pre-allocation is perfect. 3933 3934 Level: intermediate 3935 3936 .keywords: matrix, aij, compressed row, sparse, parallel 3937 3938 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3939 MPIAIJ, MatGetInfo() 3940 @*/ 3941 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3942 { 3943 PetscErrorCode ierr; 3944 3945 PetscFunctionBegin; 3946 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3947 PetscValidType(B,1); 3948 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3949 PetscFunctionReturn(0); 3950 } 3951 3952 #undef __FUNCT__ 3953 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3954 /*@ 3955 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3956 CSR format the local rows. 3957 3958 Collective on MPI_Comm 3959 3960 Input Parameters: 3961 + comm - MPI communicator 3962 . m - number of local rows (Cannot be PETSC_DECIDE) 3963 . n - This value should be the same as the local size used in creating the 3964 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3965 calculated if N is given) For square matrices n is almost always m. 3966 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3967 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3968 . i - row indices 3969 . j - column indices 3970 - a - matrix values 3971 3972 Output Parameter: 3973 . mat - the matrix 3974 3975 Level: intermediate 3976 3977 Notes: 3978 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3979 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3980 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3981 3982 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3983 3984 The format which is used for the sparse matrix input, is equivalent to a 3985 row-major ordering.. i.e for the following matrix, the input data expected is 3986 as shown: 3987 3988 1 0 0 3989 2 0 3 P0 3990 ------- 3991 4 5 6 P1 3992 3993 Process0 [P0]: rows_owned=[0,1] 3994 i = {0,1,3} [size = nrow+1 = 2+1] 3995 j = {0,0,2} [size = nz = 6] 3996 v = {1,2,3} [size = nz = 6] 3997 3998 Process1 [P1]: rows_owned=[2] 3999 i = {0,3} [size = nrow+1 = 1+1] 4000 j = {0,1,2} [size = nz = 6] 4001 v = {4,5,6} [size = nz = 6] 4002 4003 .keywords: matrix, aij, compressed row, sparse, parallel 4004 4005 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4006 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4007 @*/ 4008 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4009 { 4010 PetscErrorCode ierr; 4011 4012 PetscFunctionBegin; 4013 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4014 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4015 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4016 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4017 /* ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr); */ 4018 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4019 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4020 PetscFunctionReturn(0); 4021 } 4022 4023 #undef __FUNCT__ 4024 #define __FUNCT__ "MatCreateAIJ" 4025 /*@C 4026 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4027 (the default parallel PETSc format). For good matrix assembly performance 4028 the user should preallocate the matrix storage by setting the parameters 4029 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4030 performance can be increased by more than a factor of 50. 4031 4032 Collective on MPI_Comm 4033 4034 Input Parameters: 4035 + comm - MPI communicator 4036 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4037 This value should be the same as the local size used in creating the 4038 y vector for the matrix-vector product y = Ax. 4039 . n - This value should be the same as the local size used in creating the 4040 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4041 calculated if N is given) For square matrices n is almost always m. 4042 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4043 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4044 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4045 (same value is used for all local rows) 4046 . d_nnz - array containing the number of nonzeros in the various rows of the 4047 DIAGONAL portion of the local submatrix (possibly different for each row) 4048 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 4049 The size of this array is equal to the number of local rows, i.e 'm'. 4050 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4051 submatrix (same value is used for all local rows). 4052 - o_nnz - array containing the number of nonzeros in the various rows of the 4053 OFF-DIAGONAL portion of the local submatrix (possibly different for 4054 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 4055 structure. The size of this array is equal to the number 4056 of local rows, i.e 'm'. 4057 4058 Output Parameter: 4059 . A - the matrix 4060 4061 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4062 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4063 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4064 4065 Notes: 4066 If the *_nnz parameter is given then the *_nz parameter is ignored 4067 4068 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4069 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4070 storage requirements for this matrix. 4071 4072 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4073 processor than it must be used on all processors that share the object for 4074 that argument. 4075 4076 The user MUST specify either the local or global matrix dimensions 4077 (possibly both). 4078 4079 The parallel matrix is partitioned across processors such that the 4080 first m0 rows belong to process 0, the next m1 rows belong to 4081 process 1, the next m2 rows belong to process 2 etc.. where 4082 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4083 values corresponding to [m x N] submatrix. 4084 4085 The columns are logically partitioned with the n0 columns belonging 4086 to 0th partition, the next n1 columns belonging to the next 4087 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4088 4089 The DIAGONAL portion of the local submatrix on any given processor 4090 is the submatrix corresponding to the rows and columns m,n 4091 corresponding to the given processor. i.e diagonal matrix on 4092 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4093 etc. The remaining portion of the local submatrix [m x (N-n)] 4094 constitute the OFF-DIAGONAL portion. The example below better 4095 illustrates this concept. 4096 4097 For a square global matrix we define each processor's diagonal portion 4098 to be its local rows and the corresponding columns (a square submatrix); 4099 each processor's off-diagonal portion encompasses the remainder of the 4100 local matrix (a rectangular submatrix). 4101 4102 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4103 4104 When calling this routine with a single process communicator, a matrix of 4105 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4106 type of communicator, use the construction mechanism: 4107 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4108 4109 By default, this format uses inodes (identical nodes) when possible. 4110 We search for consecutive rows with the same nonzero structure, thereby 4111 reusing matrix information to achieve increased efficiency. 4112 4113 Options Database Keys: 4114 + -mat_no_inode - Do not use inodes 4115 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4116 - -mat_aij_oneindex - Internally use indexing starting at 1 4117 rather than 0. Note that when calling MatSetValues(), 4118 the user still MUST index entries starting at 0! 4119 4120 4121 Example usage: 4122 4123 Consider the following 8x8 matrix with 34 non-zero values, that is 4124 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4125 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4126 as follows: 4127 4128 .vb 4129 1 2 0 | 0 3 0 | 0 4 4130 Proc0 0 5 6 | 7 0 0 | 8 0 4131 9 0 10 | 11 0 0 | 12 0 4132 ------------------------------------- 4133 13 0 14 | 15 16 17 | 0 0 4134 Proc1 0 18 0 | 19 20 21 | 0 0 4135 0 0 0 | 22 23 0 | 24 0 4136 ------------------------------------- 4137 Proc2 25 26 27 | 0 0 28 | 29 0 4138 30 0 0 | 31 32 33 | 0 34 4139 .ve 4140 4141 This can be represented as a collection of submatrices as: 4142 4143 .vb 4144 A B C 4145 D E F 4146 G H I 4147 .ve 4148 4149 Where the submatrices A,B,C are owned by proc0, D,E,F are 4150 owned by proc1, G,H,I are owned by proc2. 4151 4152 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4153 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4154 The 'M','N' parameters are 8,8, and have the same values on all procs. 4155 4156 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4157 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4158 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4159 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4160 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4161 matrix, ans [DF] as another SeqAIJ matrix. 4162 4163 When d_nz, o_nz parameters are specified, d_nz storage elements are 4164 allocated for every row of the local diagonal submatrix, and o_nz 4165 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4166 One way to choose d_nz and o_nz is to use the max nonzerors per local 4167 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4168 In this case, the values of d_nz,o_nz are: 4169 .vb 4170 proc0 : dnz = 2, o_nz = 2 4171 proc1 : dnz = 3, o_nz = 2 4172 proc2 : dnz = 1, o_nz = 4 4173 .ve 4174 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4175 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4176 for proc3. i.e we are using 12+15+10=37 storage locations to store 4177 34 values. 4178 4179 When d_nnz, o_nnz parameters are specified, the storage is specified 4180 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4181 In the above case the values for d_nnz,o_nnz are: 4182 .vb 4183 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4184 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4185 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4186 .ve 4187 Here the space allocated is sum of all the above values i.e 34, and 4188 hence pre-allocation is perfect. 4189 4190 Level: intermediate 4191 4192 .keywords: matrix, aij, compressed row, sparse, parallel 4193 4194 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4195 MPIAIJ, MatCreateMPIAIJWithArrays() 4196 @*/ 4197 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4198 { 4199 PetscErrorCode ierr; 4200 PetscMPIInt size; 4201 4202 PetscFunctionBegin; 4203 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4204 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4205 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4206 if (size > 1) { 4207 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4208 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4209 } else { 4210 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4211 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4212 } 4213 PetscFunctionReturn(0); 4214 } 4215 4216 #undef __FUNCT__ 4217 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4218 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 4219 { 4220 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4221 4222 PetscFunctionBegin; 4223 *Ad = a->A; 4224 *Ao = a->B; 4225 *colmap = a->garray; 4226 PetscFunctionReturn(0); 4227 } 4228 4229 #undef __FUNCT__ 4230 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4231 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4232 { 4233 PetscErrorCode ierr; 4234 PetscInt i; 4235 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4236 4237 PetscFunctionBegin; 4238 if (coloring->ctype == IS_COLORING_GLOBAL) { 4239 ISColoringValue *allcolors,*colors; 4240 ISColoring ocoloring; 4241 4242 /* set coloring for diagonal portion */ 4243 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4244 4245 /* set coloring for off-diagonal portion */ 4246 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 4247 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4248 for (i=0; i<a->B->cmap->n; i++) { 4249 colors[i] = allcolors[a->garray[i]]; 4250 } 4251 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4252 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4253 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4254 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4255 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4256 ISColoringValue *colors; 4257 PetscInt *larray; 4258 ISColoring ocoloring; 4259 4260 /* set coloring for diagonal portion */ 4261 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4262 for (i=0; i<a->A->cmap->n; i++) { 4263 larray[i] = i + A->cmap->rstart; 4264 } 4265 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 4266 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4267 for (i=0; i<a->A->cmap->n; i++) { 4268 colors[i] = coloring->colors[larray[i]]; 4269 } 4270 ierr = PetscFree(larray);CHKERRQ(ierr); 4271 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4272 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4273 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4274 4275 /* set coloring for off-diagonal portion */ 4276 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4277 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 4278 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4279 for (i=0; i<a->B->cmap->n; i++) { 4280 colors[i] = coloring->colors[larray[i]]; 4281 } 4282 ierr = PetscFree(larray);CHKERRQ(ierr); 4283 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4284 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4285 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4286 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4287 4288 PetscFunctionReturn(0); 4289 } 4290 4291 #if defined(PETSC_HAVE_ADIC) 4292 #undef __FUNCT__ 4293 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 4294 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 4295 { 4296 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4297 PetscErrorCode ierr; 4298 4299 PetscFunctionBegin; 4300 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 4301 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 4302 PetscFunctionReturn(0); 4303 } 4304 #endif 4305 4306 #undef __FUNCT__ 4307 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4308 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4309 { 4310 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4311 PetscErrorCode ierr; 4312 4313 PetscFunctionBegin; 4314 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4315 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4316 PetscFunctionReturn(0); 4317 } 4318 4319 #undef __FUNCT__ 4320 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4321 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4322 { 4323 PetscErrorCode ierr; 4324 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4325 PetscInt *indx; 4326 4327 PetscFunctionBegin; 4328 /* This routine will ONLY return MPIAIJ type matrix */ 4329 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4330 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4331 if (n == PETSC_DECIDE){ 4332 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4333 } 4334 /* Check sum(n) = N */ 4335 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4336 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4337 4338 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4339 rstart -= m; 4340 4341 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4342 for (i=0;i<m;i++) { 4343 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4344 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4345 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4346 } 4347 4348 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4349 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE); CHKERRQ(ierr); 4350 ierr = MatSetBlockSizes(*outmat,bs,cbs); CHKERRQ(ierr); 4351 ierr = MatSetType(*outmat,MATMPIAIJ); CHKERRQ(ierr); 4352 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4353 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4354 PetscFunctionReturn(0); 4355 } 4356 4357 #undef __FUNCT__ 4358 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4359 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4360 { 4361 PetscErrorCode ierr; 4362 PetscInt m,N,i,rstart,nnz,Ii; 4363 PetscInt *indx; 4364 PetscScalar *values; 4365 4366 PetscFunctionBegin; 4367 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4368 ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 4369 for (i=0;i<m;i++) { 4370 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4371 Ii = i + rstart; 4372 ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4373 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4374 } 4375 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4376 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4377 PetscFunctionReturn(0); 4378 } 4379 4380 #undef __FUNCT__ 4381 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4382 /*@ 4383 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4384 matrices from each processor 4385 4386 Collective on MPI_Comm 4387 4388 Input Parameters: 4389 + comm - the communicators the parallel matrix will live on 4390 . inmat - the input sequential matrices 4391 . n - number of local columns (or PETSC_DECIDE) 4392 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4393 4394 Output Parameter: 4395 . outmat - the parallel matrix generated 4396 4397 Level: advanced 4398 4399 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4400 4401 @*/ 4402 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4403 { 4404 PetscErrorCode ierr; 4405 4406 PetscFunctionBegin; 4407 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4408 if (scall == MAT_INITIAL_MATRIX){ 4409 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4410 } 4411 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4412 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4413 PetscFunctionReturn(0); 4414 } 4415 4416 #undef __FUNCT__ 4417 #define __FUNCT__ "MatFileSplit" 4418 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4419 { 4420 PetscErrorCode ierr; 4421 PetscMPIInt rank; 4422 PetscInt m,N,i,rstart,nnz; 4423 size_t len; 4424 const PetscInt *indx; 4425 PetscViewer out; 4426 char *name; 4427 Mat B; 4428 const PetscScalar *values; 4429 4430 PetscFunctionBegin; 4431 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4432 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4433 /* Should this be the type of the diagonal block of A? */ 4434 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4435 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4436 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 4437 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4438 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4439 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4440 for (i=0;i<m;i++) { 4441 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4442 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4443 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4444 } 4445 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4446 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4447 4448 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4449 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4450 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4451 sprintf(name,"%s.%d",outfile,rank); 4452 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4453 ierr = PetscFree(name); 4454 ierr = MatView(B,out);CHKERRQ(ierr); 4455 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4456 ierr = MatDestroy(&B);CHKERRQ(ierr); 4457 PetscFunctionReturn(0); 4458 } 4459 4460 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4461 #undef __FUNCT__ 4462 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4463 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4464 { 4465 PetscErrorCode ierr; 4466 Mat_Merge_SeqsToMPI *merge; 4467 PetscContainer container; 4468 4469 PetscFunctionBegin; 4470 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4471 if (container) { 4472 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4473 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4474 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4475 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4476 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4477 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4478 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4479 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4480 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4481 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4482 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4483 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4484 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4485 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4486 ierr = PetscFree(merge);CHKERRQ(ierr); 4487 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4488 } 4489 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4490 PetscFunctionReturn(0); 4491 } 4492 4493 #include <../src/mat/utils/freespace.h> 4494 #include <petscbt.h> 4495 4496 #undef __FUNCT__ 4497 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4498 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4499 { 4500 PetscErrorCode ierr; 4501 MPI_Comm comm=((PetscObject)mpimat)->comm; 4502 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4503 PetscMPIInt size,rank,taga,*len_s; 4504 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4505 PetscInt proc,m; 4506 PetscInt **buf_ri,**buf_rj; 4507 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4508 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4509 MPI_Request *s_waits,*r_waits; 4510 MPI_Status *status; 4511 MatScalar *aa=a->a; 4512 MatScalar **abuf_r,*ba_i; 4513 Mat_Merge_SeqsToMPI *merge; 4514 PetscContainer container; 4515 4516 PetscFunctionBegin; 4517 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4518 4519 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4520 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4521 4522 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4523 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4524 4525 bi = merge->bi; 4526 bj = merge->bj; 4527 buf_ri = merge->buf_ri; 4528 buf_rj = merge->buf_rj; 4529 4530 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4531 owners = merge->rowmap->range; 4532 len_s = merge->len_s; 4533 4534 /* send and recv matrix values */ 4535 /*-----------------------------*/ 4536 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4537 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4538 4539 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4540 for (proc=0,k=0; proc<size; proc++){ 4541 if (!len_s[proc]) continue; 4542 i = owners[proc]; 4543 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4544 k++; 4545 } 4546 4547 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4548 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4549 ierr = PetscFree(status);CHKERRQ(ierr); 4550 4551 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4552 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4553 4554 /* insert mat values of mpimat */ 4555 /*----------------------------*/ 4556 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4557 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4558 4559 for (k=0; k<merge->nrecv; k++){ 4560 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4561 nrows = *(buf_ri_k[k]); 4562 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4563 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4564 } 4565 4566 /* set values of ba */ 4567 m = merge->rowmap->n; 4568 for (i=0; i<m; i++) { 4569 arow = owners[rank] + i; 4570 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4571 bnzi = bi[i+1] - bi[i]; 4572 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4573 4574 /* add local non-zero vals of this proc's seqmat into ba */ 4575 anzi = ai[arow+1] - ai[arow]; 4576 aj = a->j + ai[arow]; 4577 aa = a->a + ai[arow]; 4578 nextaj = 0; 4579 for (j=0; nextaj<anzi; j++){ 4580 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4581 ba_i[j] += aa[nextaj++]; 4582 } 4583 } 4584 4585 /* add received vals into ba */ 4586 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4587 /* i-th row */ 4588 if (i == *nextrow[k]) { 4589 anzi = *(nextai[k]+1) - *nextai[k]; 4590 aj = buf_rj[k] + *(nextai[k]); 4591 aa = abuf_r[k] + *(nextai[k]); 4592 nextaj = 0; 4593 for (j=0; nextaj<anzi; j++){ 4594 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4595 ba_i[j] += aa[nextaj++]; 4596 } 4597 } 4598 nextrow[k]++; nextai[k]++; 4599 } 4600 } 4601 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4602 } 4603 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4604 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4605 4606 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4607 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4608 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4609 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4610 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4611 PetscFunctionReturn(0); 4612 } 4613 4614 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4615 4616 #undef __FUNCT__ 4617 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4618 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4619 { 4620 PetscErrorCode ierr; 4621 Mat B_mpi; 4622 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4623 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4624 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4625 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4626 PetscInt len,proc,*dnz,*onz,bs,cbs; 4627 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4628 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4629 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4630 MPI_Status *status; 4631 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4632 PetscBT lnkbt; 4633 Mat_Merge_SeqsToMPI *merge; 4634 PetscContainer container; 4635 4636 PetscFunctionBegin; 4637 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4638 4639 /* make sure it is a PETSc comm */ 4640 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4641 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4642 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4643 4644 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4645 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4646 4647 /* determine row ownership */ 4648 /*---------------------------------------------------------*/ 4649 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4650 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4651 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4652 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4653 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4654 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4655 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4656 4657 m = merge->rowmap->n; 4658 M = merge->rowmap->N; 4659 owners = merge->rowmap->range; 4660 4661 /* determine the number of messages to send, their lengths */ 4662 /*---------------------------------------------------------*/ 4663 len_s = merge->len_s; 4664 4665 len = 0; /* length of buf_si[] */ 4666 merge->nsend = 0; 4667 for (proc=0; proc<size; proc++){ 4668 len_si[proc] = 0; 4669 if (proc == rank){ 4670 len_s[proc] = 0; 4671 } else { 4672 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4673 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4674 } 4675 if (len_s[proc]) { 4676 merge->nsend++; 4677 nrows = 0; 4678 for (i=owners[proc]; i<owners[proc+1]; i++){ 4679 if (ai[i+1] > ai[i]) nrows++; 4680 } 4681 len_si[proc] = 2*(nrows+1); 4682 len += len_si[proc]; 4683 } 4684 } 4685 4686 /* determine the number and length of messages to receive for ij-structure */ 4687 /*-------------------------------------------------------------------------*/ 4688 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4689 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4690 4691 /* post the Irecv of j-structure */ 4692 /*-------------------------------*/ 4693 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4694 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4695 4696 /* post the Isend of j-structure */ 4697 /*--------------------------------*/ 4698 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4699 4700 for (proc=0, k=0; proc<size; proc++){ 4701 if (!len_s[proc]) continue; 4702 i = owners[proc]; 4703 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4704 k++; 4705 } 4706 4707 /* receives and sends of j-structure are complete */ 4708 /*------------------------------------------------*/ 4709 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4710 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4711 4712 /* send and recv i-structure */ 4713 /*---------------------------*/ 4714 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4715 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4716 4717 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4718 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4719 for (proc=0,k=0; proc<size; proc++){ 4720 if (!len_s[proc]) continue; 4721 /* form outgoing message for i-structure: 4722 buf_si[0]: nrows to be sent 4723 [1:nrows]: row index (global) 4724 [nrows+1:2*nrows+1]: i-structure index 4725 */ 4726 /*-------------------------------------------*/ 4727 nrows = len_si[proc]/2 - 1; 4728 buf_si_i = buf_si + nrows+1; 4729 buf_si[0] = nrows; 4730 buf_si_i[0] = 0; 4731 nrows = 0; 4732 for (i=owners[proc]; i<owners[proc+1]; i++){ 4733 anzi = ai[i+1] - ai[i]; 4734 if (anzi) { 4735 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4736 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4737 nrows++; 4738 } 4739 } 4740 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4741 k++; 4742 buf_si += len_si[proc]; 4743 } 4744 4745 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4746 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4747 4748 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4749 for (i=0; i<merge->nrecv; i++){ 4750 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4751 } 4752 4753 ierr = PetscFree(len_si);CHKERRQ(ierr); 4754 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4755 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4756 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4757 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4758 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4759 ierr = PetscFree(status);CHKERRQ(ierr); 4760 4761 /* compute a local seq matrix in each processor */ 4762 /*----------------------------------------------*/ 4763 /* allocate bi array and free space for accumulating nonzero column info */ 4764 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4765 bi[0] = 0; 4766 4767 /* create and initialize a linked list */ 4768 nlnk = N+1; 4769 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4770 4771 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4772 len = 0; 4773 len = ai[owners[rank+1]] - ai[owners[rank]]; 4774 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4775 current_space = free_space; 4776 4777 /* determine symbolic info for each local row */ 4778 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4779 4780 for (k=0; k<merge->nrecv; k++){ 4781 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4782 nrows = *buf_ri_k[k]; 4783 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4784 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4785 } 4786 4787 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4788 len = 0; 4789 for (i=0;i<m;i++) { 4790 bnzi = 0; 4791 /* add local non-zero cols of this proc's seqmat into lnk */ 4792 arow = owners[rank] + i; 4793 anzi = ai[arow+1] - ai[arow]; 4794 aj = a->j + ai[arow]; 4795 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4796 bnzi += nlnk; 4797 /* add received col data into lnk */ 4798 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4799 if (i == *nextrow[k]) { /* i-th row */ 4800 anzi = *(nextai[k]+1) - *nextai[k]; 4801 aj = buf_rj[k] + *nextai[k]; 4802 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4803 bnzi += nlnk; 4804 nextrow[k]++; nextai[k]++; 4805 } 4806 } 4807 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4808 4809 /* if free space is not available, make more free space */ 4810 if (current_space->local_remaining<bnzi) { 4811 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4812 nspacedouble++; 4813 } 4814 /* copy data into free space, then initialize lnk */ 4815 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4816 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4817 4818 current_space->array += bnzi; 4819 current_space->local_used += bnzi; 4820 current_space->local_remaining -= bnzi; 4821 4822 bi[i+1] = bi[i] + bnzi; 4823 } 4824 4825 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4826 4827 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4828 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4829 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4830 4831 /* create symbolic parallel matrix B_mpi */ 4832 /*---------------------------------------*/ 4833 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4834 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4835 if (n==PETSC_DECIDE) { 4836 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4837 } else { 4838 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4839 } 4840 ierr = MatSetBlockSizes(B_mpi,bs,cbs); CHKERRQ(ierr); 4841 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4842 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4843 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4844 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4845 4846 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4847 B_mpi->assembled = PETSC_FALSE; 4848 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4849 merge->bi = bi; 4850 merge->bj = bj; 4851 merge->buf_ri = buf_ri; 4852 merge->buf_rj = buf_rj; 4853 merge->coi = PETSC_NULL; 4854 merge->coj = PETSC_NULL; 4855 merge->owners_co = PETSC_NULL; 4856 4857 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4858 4859 /* attach the supporting struct to B_mpi for reuse */ 4860 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4861 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4862 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4863 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4864 *mpimat = B_mpi; 4865 4866 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4867 PetscFunctionReturn(0); 4868 } 4869 4870 #undef __FUNCT__ 4871 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4872 /*@C 4873 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4874 matrices from each processor 4875 4876 Collective on MPI_Comm 4877 4878 Input Parameters: 4879 + comm - the communicators the parallel matrix will live on 4880 . seqmat - the input sequential matrices 4881 . m - number of local rows (or PETSC_DECIDE) 4882 . n - number of local columns (or PETSC_DECIDE) 4883 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4884 4885 Output Parameter: 4886 . mpimat - the parallel matrix generated 4887 4888 Level: advanced 4889 4890 Notes: 4891 The dimensions of the sequential matrix in each processor MUST be the same. 4892 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4893 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4894 @*/ 4895 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4896 { 4897 PetscErrorCode ierr; 4898 PetscMPIInt size; 4899 4900 PetscFunctionBegin; 4901 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4902 if (size == 1){ 4903 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4904 if (scall == MAT_INITIAL_MATRIX){ 4905 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4906 } else { 4907 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4908 } 4909 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4910 PetscFunctionReturn(0); 4911 } 4912 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4913 if (scall == MAT_INITIAL_MATRIX){ 4914 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4915 } 4916 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4917 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4918 PetscFunctionReturn(0); 4919 } 4920 4921 #undef __FUNCT__ 4922 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4923 /*@ 4924 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4925 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4926 with MatGetSize() 4927 4928 Not Collective 4929 4930 Input Parameters: 4931 + A - the matrix 4932 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4933 4934 Output Parameter: 4935 . A_loc - the local sequential matrix generated 4936 4937 Level: developer 4938 4939 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4940 4941 @*/ 4942 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4943 { 4944 PetscErrorCode ierr; 4945 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4946 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4947 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4948 MatScalar *aa=a->a,*ba=b->a,*cam; 4949 PetscScalar *ca; 4950 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4951 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4952 PetscBool match; 4953 4954 PetscFunctionBegin; 4955 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4956 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4957 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4958 if (scall == MAT_INITIAL_MATRIX){ 4959 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4960 ci[0] = 0; 4961 for (i=0; i<am; i++){ 4962 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4963 } 4964 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4965 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4966 k = 0; 4967 for (i=0; i<am; i++) { 4968 ncols_o = bi[i+1] - bi[i]; 4969 ncols_d = ai[i+1] - ai[i]; 4970 /* off-diagonal portion of A */ 4971 for (jo=0; jo<ncols_o; jo++) { 4972 col = cmap[*bj]; 4973 if (col >= cstart) break; 4974 cj[k] = col; bj++; 4975 ca[k++] = *ba++; 4976 } 4977 /* diagonal portion of A */ 4978 for (j=0; j<ncols_d; j++) { 4979 cj[k] = cstart + *aj++; 4980 ca[k++] = *aa++; 4981 } 4982 /* off-diagonal portion of A */ 4983 for (j=jo; j<ncols_o; j++) { 4984 cj[k] = cmap[*bj++]; 4985 ca[k++] = *ba++; 4986 } 4987 } 4988 /* put together the new matrix */ 4989 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4990 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4991 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4992 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4993 mat->free_a = PETSC_TRUE; 4994 mat->free_ij = PETSC_TRUE; 4995 mat->nonew = 0; 4996 } else if (scall == MAT_REUSE_MATRIX){ 4997 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4998 ci = mat->i; cj = mat->j; cam = mat->a; 4999 for (i=0; i<am; i++) { 5000 /* off-diagonal portion of A */ 5001 ncols_o = bi[i+1] - bi[i]; 5002 for (jo=0; jo<ncols_o; jo++) { 5003 col = cmap[*bj]; 5004 if (col >= cstart) break; 5005 *cam++ = *ba++; bj++; 5006 } 5007 /* diagonal portion of A */ 5008 ncols_d = ai[i+1] - ai[i]; 5009 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5010 /* off-diagonal portion of A */ 5011 for (j=jo; j<ncols_o; j++) { 5012 *cam++ = *ba++; bj++; 5013 } 5014 } 5015 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5016 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5017 PetscFunctionReturn(0); 5018 } 5019 5020 #undef __FUNCT__ 5021 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5022 /*@C 5023 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5024 5025 Not Collective 5026 5027 Input Parameters: 5028 + A - the matrix 5029 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5030 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 5031 5032 Output Parameter: 5033 . A_loc - the local sequential matrix generated 5034 5035 Level: developer 5036 5037 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5038 5039 @*/ 5040 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5041 { 5042 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5043 PetscErrorCode ierr; 5044 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5045 IS isrowa,iscola; 5046 Mat *aloc; 5047 PetscBool match; 5048 5049 PetscFunctionBegin; 5050 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5051 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5052 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5053 if (!row){ 5054 start = A->rmap->rstart; end = A->rmap->rend; 5055 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5056 } else { 5057 isrowa = *row; 5058 } 5059 if (!col){ 5060 start = A->cmap->rstart; 5061 cmap = a->garray; 5062 nzA = a->A->cmap->n; 5063 nzB = a->B->cmap->n; 5064 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5065 ncols = 0; 5066 for (i=0; i<nzB; i++) { 5067 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5068 else break; 5069 } 5070 imark = i; 5071 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5072 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5073 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5074 } else { 5075 iscola = *col; 5076 } 5077 if (scall != MAT_INITIAL_MATRIX){ 5078 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5079 aloc[0] = *A_loc; 5080 } 5081 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5082 *A_loc = aloc[0]; 5083 ierr = PetscFree(aloc);CHKERRQ(ierr); 5084 if (!row){ 5085 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5086 } 5087 if (!col){ 5088 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5089 } 5090 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5091 PetscFunctionReturn(0); 5092 } 5093 5094 #undef __FUNCT__ 5095 #define __FUNCT__ "MatGetBrowsOfAcols" 5096 /*@C 5097 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5098 5099 Collective on Mat 5100 5101 Input Parameters: 5102 + A,B - the matrices in mpiaij format 5103 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5104 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 5105 5106 Output Parameter: 5107 + rowb, colb - index sets of rows and columns of B to extract 5108 - B_seq - the sequential matrix generated 5109 5110 Level: developer 5111 5112 @*/ 5113 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5114 { 5115 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5116 PetscErrorCode ierr; 5117 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5118 IS isrowb,iscolb; 5119 Mat *bseq=PETSC_NULL; 5120 5121 PetscFunctionBegin; 5122 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5123 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5124 } 5125 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5126 5127 if (scall == MAT_INITIAL_MATRIX){ 5128 start = A->cmap->rstart; 5129 cmap = a->garray; 5130 nzA = a->A->cmap->n; 5131 nzB = a->B->cmap->n; 5132 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5133 ncols = 0; 5134 for (i=0; i<nzB; i++) { /* row < local row index */ 5135 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5136 else break; 5137 } 5138 imark = i; 5139 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5140 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5141 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5142 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5143 } else { 5144 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5145 isrowb = *rowb; iscolb = *colb; 5146 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5147 bseq[0] = *B_seq; 5148 } 5149 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5150 *B_seq = bseq[0]; 5151 ierr = PetscFree(bseq);CHKERRQ(ierr); 5152 if (!rowb){ 5153 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5154 } else { 5155 *rowb = isrowb; 5156 } 5157 if (!colb){ 5158 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5159 } else { 5160 *colb = iscolb; 5161 } 5162 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5163 PetscFunctionReturn(0); 5164 } 5165 5166 #undef __FUNCT__ 5167 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5168 /* 5169 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5170 of the OFF-DIAGONAL portion of local A 5171 5172 Collective on Mat 5173 5174 Input Parameters: 5175 + A,B - the matrices in mpiaij format 5176 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5177 5178 Output Parameter: 5179 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5180 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5181 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 5182 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5183 5184 Level: developer 5185 5186 */ 5187 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5188 { 5189 VecScatter_MPI_General *gen_to,*gen_from; 5190 PetscErrorCode ierr; 5191 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5192 Mat_SeqAIJ *b_oth; 5193 VecScatter ctx=a->Mvctx; 5194 MPI_Comm comm=((PetscObject)ctx)->comm; 5195 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5196 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5197 PetscScalar *rvalues,*svalues; 5198 MatScalar *b_otha,*bufa,*bufA; 5199 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5200 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 5201 MPI_Status *sstatus,rstatus; 5202 PetscMPIInt jj; 5203 PetscInt *cols,sbs,rbs; 5204 PetscScalar *vals; 5205 5206 PetscFunctionBegin; 5207 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5208 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5209 } 5210 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5211 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5212 5213 gen_to = (VecScatter_MPI_General*)ctx->todata; 5214 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5215 rvalues = gen_from->values; /* holds the length of receiving row */ 5216 svalues = gen_to->values; /* holds the length of sending row */ 5217 nrecvs = gen_from->n; 5218 nsends = gen_to->n; 5219 5220 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5221 srow = gen_to->indices; /* local row index to be sent */ 5222 sstarts = gen_to->starts; 5223 sprocs = gen_to->procs; 5224 sstatus = gen_to->sstatus; 5225 sbs = gen_to->bs; 5226 rstarts = gen_from->starts; 5227 rprocs = gen_from->procs; 5228 rbs = gen_from->bs; 5229 5230 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5231 if (scall == MAT_INITIAL_MATRIX){ 5232 /* i-array */ 5233 /*---------*/ 5234 /* post receives */ 5235 for (i=0; i<nrecvs; i++){ 5236 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5237 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5238 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5239 } 5240 5241 /* pack the outgoing message */ 5242 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5243 sstartsj[0] = 0; rstartsj[0] = 0; 5244 len = 0; /* total length of j or a array to be sent */ 5245 k = 0; 5246 for (i=0; i<nsends; i++){ 5247 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5248 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5249 for (j=0; j<nrows; j++) { 5250 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5251 for (l=0; l<sbs; l++){ 5252 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 5253 rowlen[j*sbs+l] = ncols; 5254 len += ncols; 5255 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 5256 } 5257 k++; 5258 } 5259 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5260 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5261 } 5262 /* recvs and sends of i-array are completed */ 5263 i = nrecvs; 5264 while (i--) { 5265 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5266 } 5267 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5268 5269 /* allocate buffers for sending j and a arrays */ 5270 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5271 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5272 5273 /* create i-array of B_oth */ 5274 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5275 b_othi[0] = 0; 5276 len = 0; /* total length of j or a array to be received */ 5277 k = 0; 5278 for (i=0; i<nrecvs; i++){ 5279 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5280 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5281 for (j=0; j<nrows; j++) { 5282 b_othi[k+1] = b_othi[k] + rowlen[j]; 5283 len += rowlen[j]; k++; 5284 } 5285 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5286 } 5287 5288 /* allocate space for j and a arrrays of B_oth */ 5289 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5290 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5291 5292 /* j-array */ 5293 /*---------*/ 5294 /* post receives of j-array */ 5295 for (i=0; i<nrecvs; i++){ 5296 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5297 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5298 } 5299 5300 /* pack the outgoing message j-array */ 5301 k = 0; 5302 for (i=0; i<nsends; i++){ 5303 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5304 bufJ = bufj+sstartsj[i]; 5305 for (j=0; j<nrows; j++) { 5306 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5307 for (ll=0; ll<sbs; ll++){ 5308 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5309 for (l=0; l<ncols; l++){ 5310 *bufJ++ = cols[l]; 5311 } 5312 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5313 } 5314 } 5315 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5316 } 5317 5318 /* recvs and sends of j-array are completed */ 5319 i = nrecvs; 5320 while (i--) { 5321 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5322 } 5323 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5324 } else if (scall == MAT_REUSE_MATRIX){ 5325 sstartsj = *startsj_s; 5326 rstartsj = *startsj_r; 5327 bufa = *bufa_ptr; 5328 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5329 b_otha = b_oth->a; 5330 } else { 5331 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5332 } 5333 5334 /* a-array */ 5335 /*---------*/ 5336 /* post receives of a-array */ 5337 for (i=0; i<nrecvs; i++){ 5338 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5339 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5340 } 5341 5342 /* pack the outgoing message a-array */ 5343 k = 0; 5344 for (i=0; i<nsends; i++){ 5345 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5346 bufA = bufa+sstartsj[i]; 5347 for (j=0; j<nrows; j++) { 5348 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5349 for (ll=0; ll<sbs; ll++){ 5350 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5351 for (l=0; l<ncols; l++){ 5352 *bufA++ = vals[l]; 5353 } 5354 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5355 } 5356 } 5357 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5358 } 5359 /* recvs and sends of a-array are completed */ 5360 i = nrecvs; 5361 while (i--) { 5362 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5363 } 5364 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5365 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5366 5367 if (scall == MAT_INITIAL_MATRIX){ 5368 /* put together the new matrix */ 5369 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5370 5371 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5372 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5373 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5374 b_oth->free_a = PETSC_TRUE; 5375 b_oth->free_ij = PETSC_TRUE; 5376 b_oth->nonew = 0; 5377 5378 ierr = PetscFree(bufj);CHKERRQ(ierr); 5379 if (!startsj_s || !bufa_ptr){ 5380 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5381 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5382 } else { 5383 *startsj_s = sstartsj; 5384 *startsj_r = rstartsj; 5385 *bufa_ptr = bufa; 5386 } 5387 } 5388 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5389 PetscFunctionReturn(0); 5390 } 5391 5392 #undef __FUNCT__ 5393 #define __FUNCT__ "MatGetCommunicationStructs" 5394 /*@C 5395 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5396 5397 Not Collective 5398 5399 Input Parameters: 5400 . A - The matrix in mpiaij format 5401 5402 Output Parameter: 5403 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5404 . colmap - A map from global column index to local index into lvec 5405 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5406 5407 Level: developer 5408 5409 @*/ 5410 #if defined (PETSC_USE_CTABLE) 5411 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5412 #else 5413 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5414 #endif 5415 { 5416 Mat_MPIAIJ *a; 5417 5418 PetscFunctionBegin; 5419 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5420 PetscValidPointer(lvec, 2); 5421 PetscValidPointer(colmap, 3); 5422 PetscValidPointer(multScatter, 4); 5423 a = (Mat_MPIAIJ *) A->data; 5424 if (lvec) *lvec = a->lvec; 5425 if (colmap) *colmap = a->colmap; 5426 if (multScatter) *multScatter = a->Mvctx; 5427 PetscFunctionReturn(0); 5428 } 5429 5430 EXTERN_C_BEGIN 5431 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*); 5432 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*); 5433 extern PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 5434 EXTERN_C_END 5435 5436 #undef __FUNCT__ 5437 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5438 /* 5439 Computes (B'*A')' since computing B*A directly is untenable 5440 5441 n p p 5442 ( ) ( ) ( ) 5443 m ( A ) * n ( B ) = m ( C ) 5444 ( ) ( ) ( ) 5445 5446 */ 5447 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5448 { 5449 PetscErrorCode ierr; 5450 Mat At,Bt,Ct; 5451 5452 PetscFunctionBegin; 5453 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5454 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5455 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5456 ierr = MatDestroy(&At);CHKERRQ(ierr); 5457 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5458 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5459 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5460 PetscFunctionReturn(0); 5461 } 5462 5463 #undef __FUNCT__ 5464 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5465 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5466 { 5467 PetscErrorCode ierr; 5468 PetscInt m=A->rmap->n,n=B->cmap->n; 5469 Mat Cmat; 5470 5471 PetscFunctionBegin; 5472 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5473 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5474 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5475 ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 5476 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5477 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5478 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5479 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5480 *C = Cmat; 5481 (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ; 5482 PetscFunctionReturn(0); 5483 } 5484 5485 /* ----------------------------------------------------------------*/ 5486 #undef __FUNCT__ 5487 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5488 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5489 { 5490 PetscErrorCode ierr; 5491 5492 PetscFunctionBegin; 5493 if (scall == MAT_INITIAL_MATRIX){ 5494 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5495 } 5496 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5497 PetscFunctionReturn(0); 5498 } 5499 5500 EXTERN_C_BEGIN 5501 #if defined(PETSC_HAVE_MUMPS) 5502 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5503 #endif 5504 #if defined(PETSC_HAVE_PASTIX) 5505 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5506 #endif 5507 #if defined(PETSC_HAVE_SUPERLU_DIST) 5508 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5509 #endif 5510 #if defined(PETSC_HAVE_SPOOLES) 5511 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5512 #endif 5513 #if defined(PETSC_HAVE_CLIQUE) 5514 extern PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*); 5515 #endif 5516 EXTERN_C_END 5517 5518 /*MC 5519 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5520 5521 Options Database Keys: 5522 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5523 5524 Level: beginner 5525 5526 .seealso: MatCreateAIJ() 5527 M*/ 5528 5529 EXTERN_C_BEGIN 5530 #undef __FUNCT__ 5531 #define __FUNCT__ "MatCreate_MPIAIJ" 5532 PetscErrorCode MatCreate_MPIAIJ(Mat B) 5533 { 5534 Mat_MPIAIJ *b; 5535 PetscErrorCode ierr; 5536 PetscMPIInt size; 5537 5538 PetscFunctionBegin; 5539 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5540 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5541 B->data = (void*)b; 5542 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5543 B->assembled = PETSC_FALSE; 5544 B->insertmode = NOT_SET_VALUES; 5545 b->size = size; 5546 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5547 5548 /* build cache for off array entries formed */ 5549 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5550 b->donotstash = PETSC_FALSE; 5551 b->colmap = 0; 5552 b->garray = 0; 5553 b->roworiented = PETSC_TRUE; 5554 5555 /* stuff used for matrix vector multiply */ 5556 b->lvec = PETSC_NULL; 5557 b->Mvctx = PETSC_NULL; 5558 5559 /* stuff for MatGetRow() */ 5560 b->rowindices = 0; 5561 b->rowvalues = 0; 5562 b->getrowactive = PETSC_FALSE; 5563 5564 /* flexible pointer used in CUSP/CUSPARSE classes */ 5565 b->spptr = PETSC_NULL; 5566 #if defined(PETSC_HAVE_SPOOLES) 5567 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5568 "MatGetFactor_mpiaij_spooles", 5569 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5570 #endif 5571 #if defined(PETSC_HAVE_MUMPS) 5572 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5573 "MatGetFactor_aij_mumps", 5574 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5575 #endif 5576 #if defined(PETSC_HAVE_PASTIX) 5577 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5578 "MatGetFactor_mpiaij_pastix", 5579 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5580 #endif 5581 #if defined(PETSC_HAVE_SUPERLU_DIST) 5582 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5583 "MatGetFactor_mpiaij_superlu_dist", 5584 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5585 #endif 5586 #if defined(PETSC_HAVE_CLIQUE) 5587 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_clique_C", 5588 "MatGetFactor_aij_clique", 5589 MatGetFactor_aij_clique);CHKERRQ(ierr); 5590 #endif 5591 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5592 "MatStoreValues_MPIAIJ", 5593 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5594 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5595 "MatRetrieveValues_MPIAIJ", 5596 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5597 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5598 "MatGetDiagonalBlock_MPIAIJ", 5599 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5600 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5601 "MatIsTranspose_MPIAIJ", 5602 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5603 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5604 "MatMPIAIJSetPreallocation_MPIAIJ", 5605 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5606 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5607 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5608 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5609 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5610 "MatDiagonalScaleLocal_MPIAIJ", 5611 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5612 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5613 "MatConvert_MPIAIJ_MPIAIJPERM", 5614 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5615 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5616 "MatConvert_MPIAIJ_MPIAIJCRL", 5617 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5618 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5619 "MatConvert_MPIAIJ_MPISBAIJ", 5620 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5621 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5622 "MatMatMult_MPIDense_MPIAIJ", 5623 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5624 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5625 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5626 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5627 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5628 "MatMatMultNumeric_MPIDense_MPIAIJ", 5629 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5630 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5631 PetscFunctionReturn(0); 5632 } 5633 EXTERN_C_END 5634 5635 #undef __FUNCT__ 5636 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5637 /*@ 5638 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5639 and "off-diagonal" part of the matrix in CSR format. 5640 5641 Collective on MPI_Comm 5642 5643 Input Parameters: 5644 + comm - MPI communicator 5645 . m - number of local rows (Cannot be PETSC_DECIDE) 5646 . n - This value should be the same as the local size used in creating the 5647 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5648 calculated if N is given) For square matrices n is almost always m. 5649 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5650 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5651 . i - row indices for "diagonal" portion of matrix 5652 . j - column indices 5653 . a - matrix values 5654 . oi - row indices for "off-diagonal" portion of matrix 5655 . oj - column indices 5656 - oa - matrix values 5657 5658 Output Parameter: 5659 . mat - the matrix 5660 5661 Level: advanced 5662 5663 Notes: 5664 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5665 must free the arrays once the matrix has been destroyed and not before. 5666 5667 The i and j indices are 0 based 5668 5669 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5670 5671 This sets local rows and cannot be used to set off-processor values. 5672 5673 You cannot later use MatSetValues() to change values in this matrix. 5674 5675 .keywords: matrix, aij, compressed row, sparse, parallel 5676 5677 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5678 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5679 @*/ 5680 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5681 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5682 { 5683 PetscErrorCode ierr; 5684 Mat_MPIAIJ *maij; 5685 5686 PetscFunctionBegin; 5687 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5688 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5689 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5690 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5691 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5692 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5693 maij = (Mat_MPIAIJ*) (*mat)->data; 5694 maij->donotstash = PETSC_TRUE; 5695 (*mat)->preallocated = PETSC_TRUE; 5696 5697 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5698 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5699 5700 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5701 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5702 5703 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5704 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5705 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5706 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5707 5708 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5709 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5710 PetscFunctionReturn(0); 5711 } 5712 5713 /* 5714 Special version for direct calls from Fortran 5715 */ 5716 #include <petsc-private/fortranimpl.h> 5717 5718 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5719 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5720 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5721 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5722 #endif 5723 5724 /* Change these macros so can be used in void function */ 5725 #undef CHKERRQ 5726 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5727 #undef SETERRQ2 5728 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5729 #undef SETERRQ3 5730 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5731 #undef SETERRQ 5732 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5733 5734 EXTERN_C_BEGIN 5735 #undef __FUNCT__ 5736 #define __FUNCT__ "matsetvaluesmpiaij_" 5737 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5738 { 5739 Mat mat = *mmat; 5740 PetscInt m = *mm, n = *mn; 5741 InsertMode addv = *maddv; 5742 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5743 PetscScalar value; 5744 PetscErrorCode ierr; 5745 5746 MatCheckPreallocated(mat,1); 5747 if (mat->insertmode == NOT_SET_VALUES) { 5748 mat->insertmode = addv; 5749 } 5750 #if defined(PETSC_USE_DEBUG) 5751 else if (mat->insertmode != addv) { 5752 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5753 } 5754 #endif 5755 { 5756 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5757 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5758 PetscBool roworiented = aij->roworiented; 5759 5760 /* Some Variables required in the macro */ 5761 Mat A = aij->A; 5762 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5763 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5764 MatScalar *aa = a->a; 5765 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5766 Mat B = aij->B; 5767 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5768 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5769 MatScalar *ba = b->a; 5770 5771 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5772 PetscInt nonew = a->nonew; 5773 MatScalar *ap1,*ap2; 5774 5775 PetscFunctionBegin; 5776 for (i=0; i<m; i++) { 5777 if (im[i] < 0) continue; 5778 #if defined(PETSC_USE_DEBUG) 5779 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5780 #endif 5781 if (im[i] >= rstart && im[i] < rend) { 5782 row = im[i] - rstart; 5783 lastcol1 = -1; 5784 rp1 = aj + ai[row]; 5785 ap1 = aa + ai[row]; 5786 rmax1 = aimax[row]; 5787 nrow1 = ailen[row]; 5788 low1 = 0; 5789 high1 = nrow1; 5790 lastcol2 = -1; 5791 rp2 = bj + bi[row]; 5792 ap2 = ba + bi[row]; 5793 rmax2 = bimax[row]; 5794 nrow2 = bilen[row]; 5795 low2 = 0; 5796 high2 = nrow2; 5797 5798 for (j=0; j<n; j++) { 5799 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5800 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5801 if (in[j] >= cstart && in[j] < cend){ 5802 col = in[j] - cstart; 5803 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5804 } else if (in[j] < 0) continue; 5805 #if defined(PETSC_USE_DEBUG) 5806 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5807 #endif 5808 else { 5809 if (mat->was_assembled) { 5810 if (!aij->colmap) { 5811 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5812 } 5813 #if defined (PETSC_USE_CTABLE) 5814 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5815 col--; 5816 #else 5817 col = aij->colmap[in[j]] - 1; 5818 #endif 5819 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5820 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5821 col = in[j]; 5822 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5823 B = aij->B; 5824 b = (Mat_SeqAIJ*)B->data; 5825 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5826 rp2 = bj + bi[row]; 5827 ap2 = ba + bi[row]; 5828 rmax2 = bimax[row]; 5829 nrow2 = bilen[row]; 5830 low2 = 0; 5831 high2 = nrow2; 5832 bm = aij->B->rmap->n; 5833 ba = b->a; 5834 } 5835 } else col = in[j]; 5836 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5837 } 5838 } 5839 } else { 5840 if (!aij->donotstash) { 5841 if (roworiented) { 5842 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5843 } else { 5844 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5845 } 5846 } 5847 } 5848 }} 5849 PetscFunctionReturnVoid(); 5850 } 5851 EXTERN_C_END 5852 5853