1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: baij.c,v 1.179 1999/06/30 23:51:46 balay Exp bsmith $"; 3 #endif 4 5 /* 6 Defines the basic matrix operations for the BAIJ (compressed row) 7 matrix storage format. 8 */ 9 #include "sys.h" 10 #include "src/mat/impls/baij/seq/baij.h" 11 #include "src/vec/vecimpl.h" 12 #include "src/inline/spops.h" 13 14 #define CHUNKSIZE 10 15 16 /* 17 Checks for missing diagonals 18 */ 19 #undef __FUNC__ 20 #define __FUNC__ "MatMissingDiag_SeqBAIJ" 21 int MatMissingDiag_SeqBAIJ(Mat A) 22 { 23 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 24 int *diag = a->diag, *jj = a->j,i; 25 26 PetscFunctionBegin; 27 for ( i=0; i<a->mbs; i++ ) { 28 if (jj[diag[i]] != i) { 29 SETERRQ1(1,1,"Matrix is missing diagonal number %d",i); 30 } 31 } 32 PetscFunctionReturn(0); 33 } 34 35 #undef __FUNC__ 36 #define __FUNC__ "MatMarkDiag_SeqBAIJ" 37 int MatMarkDiag_SeqBAIJ(Mat A) 38 { 39 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 40 int i,j, *diag, m = a->mbs; 41 42 PetscFunctionBegin; 43 diag = (int *) PetscMalloc( (m+1)*sizeof(int));CHKPTRQ(diag); 44 PLogObjectMemory(A,(m+1)*sizeof(int)); 45 for ( i=0; i<m; i++ ) { 46 diag[i] = a->i[i+1]; 47 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 48 if (a->j[j] == i) { 49 diag[i] = j; 50 break; 51 } 52 } 53 } 54 a->diag = diag; 55 PetscFunctionReturn(0); 56 } 57 58 59 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 60 61 #undef __FUNC__ 62 #define __FUNC__ "MatGetRowIJ_SeqBAIJ" 63 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja, 64 PetscTruth *done) 65 { 66 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 67 int ierr, n = a->mbs,i; 68 69 PetscFunctionBegin; 70 *nn = n; 71 if (!ia) PetscFunctionReturn(0); 72 if (symmetric) { 73 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 74 } else if (oshift == 1) { 75 /* temporarily add 1 to i and j indices */ 76 int nz = a->i[n] + 1; 77 for ( i=0; i<nz; i++ ) a->j[i]++; 78 for ( i=0; i<n+1; i++ ) a->i[i]++; 79 *ia = a->i; *ja = a->j; 80 } else { 81 *ia = a->i; *ja = a->j; 82 } 83 84 PetscFunctionReturn(0); 85 } 86 87 #undef __FUNC__ 88 #define __FUNC__ "MatRestoreRowIJ_SeqBAIJ" 89 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja, 90 PetscTruth *done) 91 { 92 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 93 int i,n = a->mbs,ierr; 94 95 PetscFunctionBegin; 96 if (!ia) PetscFunctionReturn(0); 97 if (symmetric) { 98 ierr = PetscFree(*ia);CHKERRQ(ierr); 99 ierr = PetscFree(*ja);CHKERRQ(ierr); 100 } else if (oshift == 1) { 101 int nz = a->i[n]; 102 for ( i=0; i<nz; i++ ) a->j[i]--; 103 for ( i=0; i<n+1; i++ ) a->i[i]--; 104 } 105 PetscFunctionReturn(0); 106 } 107 108 #undef __FUNC__ 109 #define __FUNC__ "MatGetBlockSize_SeqBAIJ" 110 int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs) 111 { 112 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data; 113 114 PetscFunctionBegin; 115 *bs = baij->bs; 116 PetscFunctionReturn(0); 117 } 118 119 120 #undef __FUNC__ 121 #define __FUNC__ "MatDestroy_SeqBAIJ" 122 int MatDestroy_SeqBAIJ(Mat A) 123 { 124 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 125 int ierr; 126 127 if (A->mapping) { 128 ierr = ISLocalToGlobalMappingDestroy(A->mapping);CHKERRQ(ierr); 129 } 130 if (A->bmapping) { 131 ierr = ISLocalToGlobalMappingDestroy(A->bmapping);CHKERRQ(ierr); 132 } 133 if (A->rmap) { 134 ierr = MapDestroy(A->rmap);CHKERRQ(ierr); 135 } 136 if (A->cmap) { 137 ierr = MapDestroy(A->cmap);CHKERRQ(ierr); 138 } 139 #if defined(PETSC_USE_LOG) 140 PLogObjectState((PetscObject) A,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz); 141 #endif 142 ierr = PetscFree(a->a);CHKERRQ(ierr); 143 if (!a->singlemalloc) { 144 ierr = PetscFree(a->i);CHKERRQ(ierr); 145 ierr = PetscFree(a->j);CHKERRQ(ierr); 146 } 147 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 148 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 149 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 150 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 151 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 152 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 153 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 154 ierr = PetscFree(a);CHKERRQ(ierr); 155 PLogObjectDestroy(A); 156 PetscHeaderDestroy(A); 157 PetscFunctionReturn(0); 158 } 159 160 #undef __FUNC__ 161 #define __FUNC__ "MatSetOption_SeqBAIJ" 162 int MatSetOption_SeqBAIJ(Mat A,MatOption op) 163 { 164 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 165 166 PetscFunctionBegin; 167 if (op == MAT_ROW_ORIENTED) a->roworiented = 1; 168 else if (op == MAT_COLUMN_ORIENTED) a->roworiented = 0; 169 else if (op == MAT_COLUMNS_SORTED) a->sorted = 1; 170 else if (op == MAT_COLUMNS_UNSORTED) a->sorted = 0; 171 else if (op == MAT_NO_NEW_NONZERO_LOCATIONS) a->nonew = 1; 172 else if (op == MAT_NEW_NONZERO_LOCATION_ERR) a->nonew = -1; 173 else if (op == MAT_NEW_NONZERO_ALLOCATION_ERR) a->nonew = -2; 174 else if (op == MAT_YES_NEW_NONZERO_LOCATIONS) a->nonew = 0; 175 else if (op == MAT_ROWS_SORTED || 176 op == MAT_ROWS_UNSORTED || 177 op == MAT_SYMMETRIC || 178 op == MAT_STRUCTURALLY_SYMMETRIC || 179 op == MAT_YES_NEW_DIAGONALS || 180 op == MAT_IGNORE_OFF_PROC_ENTRIES || 181 op == MAT_USE_HASH_TABLE) { 182 PLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n"); 183 } else if (op == MAT_NO_NEW_DIAGONALS) { 184 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 185 } else { 186 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 187 } 188 PetscFunctionReturn(0); 189 } 190 191 192 #undef __FUNC__ 193 #define __FUNC__ "MatGetSize_SeqBAIJ" 194 int MatGetSize_SeqBAIJ(Mat A,int *m,int *n) 195 { 196 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 197 198 PetscFunctionBegin; 199 if (m) *m = a->m; 200 if (n) *n = a->n; 201 PetscFunctionReturn(0); 202 } 203 204 #undef __FUNC__ 205 #define __FUNC__ "MatGetOwnershipRange_SeqBAIJ" 206 int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 207 { 208 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 209 210 PetscFunctionBegin; 211 *m = 0; *n = a->m; 212 PetscFunctionReturn(0); 213 } 214 215 #undef __FUNC__ 216 #define __FUNC__ "MatGetRow_SeqBAIJ" 217 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 218 { 219 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 220 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 221 MatScalar *aa,*aa_i; 222 Scalar *v_i; 223 224 PetscFunctionBegin; 225 bs = a->bs; 226 ai = a->i; 227 aj = a->j; 228 aa = a->a; 229 bs2 = a->bs2; 230 231 if (row < 0 || row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row out of range"); 232 233 bn = row/bs; /* Block number */ 234 bp = row % bs; /* Block Position */ 235 M = ai[bn+1] - ai[bn]; 236 *nz = bs*M; 237 238 if (v) { 239 *v = 0; 240 if (*nz) { 241 *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) );CHKPTRQ(*v); 242 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 243 v_i = *v + i*bs; 244 aa_i = aa + bs2*(ai[bn] + i); 245 for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];} 246 } 247 } 248 } 249 250 if (idx) { 251 *idx = 0; 252 if (*nz) { 253 *idx = (int *) PetscMalloc( (*nz)*sizeof(int) );CHKPTRQ(*idx); 254 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 255 idx_i = *idx + i*bs; 256 itmp = bs*aj[ai[bn] + i]; 257 for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;} 258 } 259 } 260 } 261 PetscFunctionReturn(0); 262 } 263 264 #undef __FUNC__ 265 #define __FUNC__ "MatRestoreRow_SeqBAIJ" 266 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 267 { 268 int ierr; 269 270 PetscFunctionBegin; 271 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 272 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 273 PetscFunctionReturn(0); 274 } 275 276 #undef __FUNC__ 277 #define __FUNC__ "MatTranspose_SeqBAIJ" 278 int MatTranspose_SeqBAIJ(Mat A,Mat *B) 279 { 280 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 281 Mat C; 282 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 283 int *rows,*cols,bs2=a->bs2; 284 MatScalar *array = a->a; 285 286 PetscFunctionBegin; 287 if (B==PETSC_NULL && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Square matrix only for in-place"); 288 col = (int *) PetscMalloc((1+nbs)*sizeof(int));CHKPTRQ(col); 289 ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr); 290 291 for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1; 292 ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C);CHKERRQ(ierr); 293 ierr = PetscFree(col);CHKERRQ(ierr); 294 rows = (int *) PetscMalloc(2*bs*sizeof(int));CHKPTRQ(rows); 295 cols = rows + bs; 296 for ( i=0; i<mbs; i++ ) { 297 cols[0] = i*bs; 298 for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1; 299 len = ai[i+1] - ai[i]; 300 for ( j=0; j<len; j++ ) { 301 rows[0] = (*aj++)*bs; 302 for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1; 303 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 304 array += bs2; 305 } 306 } 307 ierr = PetscFree(rows);CHKERRQ(ierr); 308 309 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 310 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 311 312 if (B != PETSC_NULL) { 313 *B = C; 314 } else { 315 PetscOps *Abops; 316 MatOps Aops; 317 318 /* This isn't really an in-place transpose */ 319 ierr = PetscFree(a->a);CHKERRQ(ierr); 320 if (!a->singlemalloc) { 321 ierr = PetscFree(a->i);CHKERRQ(ierr); 322 ierr = PetscFree(a->j);CHKERRQ(ierr); 323 } 324 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 325 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 326 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 327 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 328 ierr = PetscFree(a);CHKERRQ(ierr); 329 330 331 ierr = MapDestroy(A->rmap);CHKERRQ(ierr); 332 ierr = MapDestroy(A->cmap);CHKERRQ(ierr); 333 334 /* 335 This is horrible, horrible code. We need to keep the 336 A pointers for the bops and ops but copy everything 337 else from C. 338 */ 339 Abops = A->bops; 340 Aops = A->ops; 341 ierr = PetscMemcpy(A,C,sizeof(struct _p_Mat));CHKERRQ(ierr); 342 A->bops = Abops; 343 A->ops = Aops; 344 A->qlist = 0; 345 346 PetscHeaderDestroy(C); 347 } 348 PetscFunctionReturn(0); 349 } 350 351 #undef __FUNC__ 352 #define __FUNC__ "MatView_SeqBAIJ_Binary" 353 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer) 354 { 355 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 356 int i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 357 Scalar *aa; 358 FILE *file; 359 360 PetscFunctionBegin; 361 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 362 col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens); 363 col_lens[0] = MAT_COOKIE; 364 365 col_lens[1] = a->m; 366 col_lens[2] = a->n; 367 col_lens[3] = a->nz*bs2; 368 369 /* store lengths of each row and write (including header) to file */ 370 count = 0; 371 for ( i=0; i<a->mbs; i++ ) { 372 for ( j=0; j<bs; j++ ) { 373 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 374 } 375 } 376 ierr = PetscBinaryWrite(fd,col_lens,4+a->m,PETSC_INT,1);CHKERRQ(ierr); 377 ierr = PetscFree(col_lens);CHKERRQ(ierr); 378 379 /* store column indices (zero start index) */ 380 jj = (int *) PetscMalloc( (a->nz+1)*bs2*sizeof(int) );CHKPTRQ(jj); 381 count = 0; 382 for ( i=0; i<a->mbs; i++ ) { 383 for ( j=0; j<bs; j++ ) { 384 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 385 for ( l=0; l<bs; l++ ) { 386 jj[count++] = bs*a->j[k] + l; 387 } 388 } 389 } 390 } 391 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr); 392 ierr = PetscFree(jj);CHKERRQ(ierr); 393 394 /* store nonzero values */ 395 aa = (Scalar *) PetscMalloc((a->nz+1)*bs2*sizeof(Scalar));CHKPTRQ(aa); 396 count = 0; 397 for ( i=0; i<a->mbs; i++ ) { 398 for ( j=0; j<bs; j++ ) { 399 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 400 for ( l=0; l<bs; l++ ) { 401 aa[count++] = a->a[bs2*k + l*bs + j]; 402 } 403 } 404 } 405 } 406 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr); 407 ierr = PetscFree(aa);CHKERRQ(ierr); 408 409 ierr = ViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 410 if (file) { 411 fprintf(file,"-matload_block_size %d\n",a->bs); 412 } 413 PetscFunctionReturn(0); 414 } 415 416 #undef __FUNC__ 417 #define __FUNC__ "MatView_SeqBAIJ_ASCII" 418 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer) 419 { 420 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 421 int ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2; 422 FILE *fd; 423 char *outputname; 424 425 PetscFunctionBegin; 426 ierr = ViewerASCIIGetPointer(viewer,&fd);CHKERRQ(ierr); 427 ierr = ViewerGetOutputname(viewer,&outputname);CHKERRQ(ierr); 428 ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 429 if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) { 430 ierr = ViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 431 } else if (format == VIEWER_FORMAT_ASCII_MATLAB) { 432 SETERRQ(PETSC_ERR_SUP,0,"Socket format not supported"); 433 } else if (format == VIEWER_FORMAT_ASCII_COMMON) { 434 for ( i=0; i<a->mbs; i++ ) { 435 for ( j=0; j<bs; j++ ) { 436 fprintf(fd,"row %d:",i*bs+j); 437 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 438 for ( l=0; l<bs; l++ ) { 439 #if defined(PETSC_USE_COMPLEX) 440 if (PetscImaginary(a->a[bs2*k + l*bs + j]) > 0.0 && PetscReal(a->a[bs2*k + l*bs + j]) != 0.0) { 441 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 442 PetscReal(a->a[bs2*k + l*bs + j]),PetscImaginary(a->a[bs2*k + l*bs + j])); 443 } else if (PetscImaginary(a->a[bs2*k + l*bs + j]) < 0.0 && PetscReal(a->a[bs2*k + l*bs + j]) != 0.0) { 444 fprintf(fd," %d %g - %g i",bs*a->j[k]+l, 445 PetscReal(a->a[bs2*k + l*bs + j]),-PetscImaginary(a->a[bs2*k + l*bs + j])); 446 } else if (PetscReal(a->a[bs2*k + l*bs + j]) != 0.0) { 447 fprintf(fd," %d %g ",bs*a->j[k]+l,PetscReal(a->a[bs2*k + l*bs + j])); 448 } 449 #else 450 if (a->a[bs2*k + l*bs + j] != 0.0) { 451 fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 452 } 453 #endif 454 } 455 } 456 fprintf(fd,"\n"); 457 } 458 } 459 } else { 460 for ( i=0; i<a->mbs; i++ ) { 461 for ( j=0; j<bs; j++ ) { 462 fprintf(fd,"row %d:",i*bs+j); 463 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 464 for ( l=0; l<bs; l++ ) { 465 #if defined(PETSC_USE_COMPLEX) 466 if (PetscImaginary(a->a[bs2*k + l*bs + j]) > 0.0) { 467 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 468 PetscReal(a->a[bs2*k + l*bs + j]),PetscImaginary(a->a[bs2*k + l*bs + j])); 469 } else if (PetscImaginary(a->a[bs2*k + l*bs + j]) < 0.0) { 470 fprintf(fd," %d %g - %g i",bs*a->j[k]+l, 471 PetscReal(a->a[bs2*k + l*bs + j]),-PetscImaginary(a->a[bs2*k + l*bs + j])); 472 } else { 473 fprintf(fd," %d %g ",bs*a->j[k]+l,PetscReal(a->a[bs2*k + l*bs + j])); 474 } 475 #else 476 fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 477 #endif 478 } 479 } 480 fprintf(fd,"\n"); 481 } 482 } 483 } 484 fflush(fd); 485 PetscFunctionReturn(0); 486 } 487 488 #undef __FUNC__ 489 #define __FUNC__ "MatView_SeqBAIJ_Draw_Zoom" 490 static int MatView_SeqBAIJ_Draw_Zoom(Draw draw,void *Aa) 491 { 492 Mat A = (Mat) Aa; 493 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *) A->data; 494 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank; 495 double xl,yl,xr,yr,x_l,x_r,y_l,y_r; 496 MatScalar *aa; 497 MPI_Comm comm; 498 Viewer viewer; 499 500 PetscFunctionBegin; 501 /* 502 This is nasty. If this is called from an originally parallel matrix 503 then all processes call this, but only the first has the matrix so the 504 rest should return immediately. 505 */ 506 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 507 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 508 if (rank) PetscFunctionReturn(0); 509 510 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*) &viewer);CHKERRQ(ierr); 511 512 ierr = DrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 513 514 /* loop over matrix elements drawing boxes */ 515 color = DRAW_BLUE; 516 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 517 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 518 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 519 x_l = a->j[j]*bs; x_r = x_l + 1.0; 520 aa = a->a + j*bs2; 521 for ( k=0; k<bs; k++ ) { 522 for ( l=0; l<bs; l++ ) { 523 if (PetscReal(*aa++) >= 0.) continue; 524 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 525 } 526 } 527 } 528 } 529 color = DRAW_CYAN; 530 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 531 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 532 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 533 x_l = a->j[j]*bs; x_r = x_l + 1.0; 534 aa = a->a + j*bs2; 535 for ( k=0; k<bs; k++ ) { 536 for ( l=0; l<bs; l++ ) { 537 if (PetscReal(*aa++) != 0.) continue; 538 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 539 } 540 } 541 } 542 } 543 544 color = DRAW_RED; 545 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 546 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 547 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 548 x_l = a->j[j]*bs; x_r = x_l + 1.0; 549 aa = a->a + j*bs2; 550 for ( k=0; k<bs; k++ ) { 551 for ( l=0; l<bs; l++ ) { 552 if (PetscReal(*aa++) <= 0.) continue; 553 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 554 } 555 } 556 } 557 } 558 PetscFunctionReturn(0); 559 } 560 561 #undef __FUNC__ 562 #define __FUNC__ "MatView_SeqBAIJ_Draw" 563 static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer) 564 { 565 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *) A->data; 566 int ierr; 567 double xl,yl,xr,yr,w,h; 568 Draw draw; 569 PetscTruth isnull; 570 571 PetscFunctionBegin; 572 573 ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 574 ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 575 576 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 577 xr = a->n; yr = a->m; h = yr/10.0; w = xr/10.0; 578 xr += w; yr += h; xl = -w; yl = -h; 579 ierr = DrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 580 ierr = DrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 581 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 582 PetscFunctionReturn(0); 583 } 584 585 #undef __FUNC__ 586 #define __FUNC__ "MatView_SeqBAIJ" 587 int MatView_SeqBAIJ(Mat A,Viewer viewer) 588 { 589 ViewerType vtype; 590 int ierr; 591 592 PetscFunctionBegin; 593 ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr); 594 if (PetscTypeCompare(vtype,SOCKET_VIEWER)) { 595 SETERRQ(PETSC_ERR_SUP,0,"Socket viewer not supported"); 596 } else if (PetscTypeCompare(vtype,ASCII_VIEWER)){ 597 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 598 } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) { 599 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 600 } else if (PetscTypeCompare(vtype,DRAW_VIEWER)) { 601 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 602 } else { 603 SETERRQ(1,1,"Viewer type not supported by PETSc object"); 604 } 605 PetscFunctionReturn(0); 606 } 607 608 609 #undef __FUNC__ 610 #define __FUNC__ "MatGetValues_SeqBAIJ" 611 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v) 612 { 613 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 614 int *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 615 int *ai = a->i, *ailen = a->ilen; 616 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 617 MatScalar *ap, *aa = a->a, zero = 0.0; 618 619 PetscFunctionBegin; 620 for ( k=0; k<m; k++ ) { /* loop over rows */ 621 row = im[k]; brow = row/bs; 622 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 623 if (row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 624 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 625 nrow = ailen[brow]; 626 for ( l=0; l<n; l++ ) { /* loop over columns */ 627 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 628 if (in[l] >= a->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 629 col = in[l] ; 630 bcol = col/bs; 631 cidx = col%bs; 632 ridx = row%bs; 633 high = nrow; 634 low = 0; /* assume unsorted */ 635 while (high-low > 5) { 636 t = (low+high)/2; 637 if (rp[t] > bcol) high = t; 638 else low = t; 639 } 640 for ( i=low; i<high; i++ ) { 641 if (rp[i] > bcol) break; 642 if (rp[i] == bcol) { 643 *v++ = ap[bs2*i+bs*cidx+ridx]; 644 goto finished; 645 } 646 } 647 *v++ = zero; 648 finished:; 649 } 650 } 651 PetscFunctionReturn(0); 652 } 653 654 655 #undef __FUNC__ 656 #define __FUNC__ "MatSetValuesBlocked_SeqBAIJ" 657 int MatSetValuesBlocked_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 658 { 659 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 660 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 661 int *imax=a->imax,*ai=a->i,*ailen=a->ilen, roworiented=a->roworiented; 662 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 663 Scalar *value = v; 664 MatScalar *ap,*aa=a->a,*bap; 665 666 PetscFunctionBegin; 667 if (roworiented) { 668 stepval = (n-1)*bs; 669 } else { 670 stepval = (m-1)*bs; 671 } 672 for ( k=0; k<m; k++ ) { /* loop over added rows */ 673 row = im[k]; 674 if (row < 0) continue; 675 #if defined(PETSC_USE_BOPT_g) 676 if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 677 #endif 678 rp = aj + ai[row]; 679 ap = aa + bs2*ai[row]; 680 rmax = imax[row]; 681 nrow = ailen[row]; 682 low = 0; 683 for ( l=0; l<n; l++ ) { /* loop over added columns */ 684 if (in[l] < 0) continue; 685 #if defined(PETSC_USE_BOPT_g) 686 if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 687 #endif 688 col = in[l]; 689 if (roworiented) { 690 value = v + k*(stepval+bs)*bs + l*bs; 691 } else { 692 value = v + l*(stepval+bs)*bs + k*bs; 693 } 694 if (!sorted) low = 0; high = nrow; 695 while (high-low > 7) { 696 t = (low+high)/2; 697 if (rp[t] > col) high = t; 698 else low = t; 699 } 700 for ( i=low; i<high; i++ ) { 701 if (rp[i] > col) break; 702 if (rp[i] == col) { 703 bap = ap + bs2*i; 704 if (roworiented) { 705 if (is == ADD_VALUES) { 706 for ( ii=0; ii<bs; ii++,value+=stepval ) { 707 for (jj=ii; jj<bs2; jj+=bs ) { 708 bap[jj] += *value++; 709 } 710 } 711 } else { 712 for ( ii=0; ii<bs; ii++,value+=stepval ) { 713 for (jj=ii; jj<bs2; jj+=bs ) { 714 bap[jj] = *value++; 715 } 716 } 717 } 718 } else { 719 if (is == ADD_VALUES) { 720 for ( ii=0; ii<bs; ii++,value+=stepval ) { 721 for (jj=0; jj<bs; jj++ ) { 722 *bap++ += *value++; 723 } 724 } 725 } else { 726 for ( ii=0; ii<bs; ii++,value+=stepval ) { 727 for (jj=0; jj<bs; jj++ ) { 728 *bap++ = *value++; 729 } 730 } 731 } 732 } 733 goto noinsert2; 734 } 735 } 736 if (nonew == 1) goto noinsert2; 737 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); 738 if (nrow >= rmax) { 739 /* there is no extra room in row, therefore enlarge */ 740 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 741 MatScalar *new_a; 742 743 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); 744 745 /* malloc new storage space */ 746 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 747 new_a = (MatScalar *) PetscMalloc( len );CHKPTRQ(new_a); 748 new_j = (int *) (new_a + bs2*new_nz); 749 new_i = new_j + new_nz; 750 751 /* copy over old data into new slots */ 752 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} 753 for ( ii=row+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 754 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 755 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 756 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 757 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 758 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 759 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 760 /* free up old matrix storage */ 761 ierr = PetscFree(a->a);CHKERRQ(ierr); 762 if (!a->singlemalloc) { 763 ierr = PetscFree(a->i);CHKERRQ(ierr); 764 ierr = PetscFree(a->j);CHKERRQ(ierr); 765 } 766 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 767 a->singlemalloc = 1; 768 769 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 770 rmax = imax[row] = imax[row] + CHUNKSIZE; 771 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 772 a->maxnz += bs2*CHUNKSIZE; 773 a->reallocs++; 774 a->nz++; 775 } 776 N = nrow++ - 1; 777 /* shift up all the later entries in this row */ 778 for ( ii=N; ii>=i; ii-- ) { 779 rp[ii+1] = rp[ii]; 780 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 781 } 782 if (N >= i) { 783 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 784 } 785 rp[i] = col; 786 bap = ap + bs2*i; 787 if (roworiented) { 788 for ( ii=0; ii<bs; ii++,value+=stepval) { 789 for (jj=ii; jj<bs2; jj+=bs ) { 790 bap[jj] = *value++; 791 } 792 } 793 } else { 794 for ( ii=0; ii<bs; ii++,value+=stepval ) { 795 for (jj=0; jj<bs; jj++ ) { 796 *bap++ = *value++; 797 } 798 } 799 } 800 noinsert2:; 801 low = i; 802 } 803 ailen[row] = nrow; 804 } 805 PetscFunctionReturn(0); 806 } 807 808 809 #undef __FUNC__ 810 #define __FUNC__ "MatAssemblyEnd_SeqBAIJ" 811 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 812 { 813 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 814 int fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax; 815 int m = a->m,*ip, N, *ailen = a->ilen; 816 int mbs = a->mbs, bs2 = a->bs2,rmax = 0,ierr; 817 MatScalar *aa = a->a, *ap; 818 819 PetscFunctionBegin; 820 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 821 822 if (m) rmax = ailen[0]; 823 for ( i=1; i<mbs; i++ ) { 824 /* move each row back by the amount of empty slots (fshift) before it*/ 825 fshift += imax[i-1] - ailen[i-1]; 826 rmax = PetscMax(rmax,ailen[i]); 827 if (fshift) { 828 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 829 N = ailen[i]; 830 for ( j=0; j<N; j++ ) { 831 ip[j-fshift] = ip[j]; 832 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 833 } 834 } 835 ai[i] = ai[i-1] + ailen[i-1]; 836 } 837 if (mbs) { 838 fshift += imax[mbs-1] - ailen[mbs-1]; 839 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 840 } 841 /* reset ilen and imax for each row */ 842 for ( i=0; i<mbs; i++ ) { 843 ailen[i] = imax[i] = ai[i+1] - ai[i]; 844 } 845 a->nz = ai[mbs]; 846 847 /* diagonals may have moved, so kill the diagonal pointers */ 848 if (fshift && a->diag) { 849 ierr = PetscFree(a->diag);CHKERRQ(ierr); 850 PLogObjectMemory(A,-(m+1)*sizeof(int)); 851 a->diag = 0; 852 } 853 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 854 m,a->n,a->bs,fshift*bs2,a->nz*bs2); 855 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n", 856 a->reallocs); 857 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 858 a->reallocs = 0; 859 A->info.nz_unneeded = (double)fshift*bs2; 860 861 PetscFunctionReturn(0); 862 } 863 864 865 866 /* 867 This function returns an array of flags which indicate the locations of contiguous 868 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 869 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 870 Assume: sizes should be long enough to hold all the values. 871 */ 872 #undef __FUNC__ 873 #define __FUNC__ "MatZeroRows_SeqBAIJ_Check_Blocks" 874 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 875 { 876 int i,j,k,row; 877 PetscTruth flg; 878 879 /* PetscFunctionBegin;*/ 880 for ( i=0,j=0; i<n; j++ ) { 881 row = idx[i]; 882 if (row%bs!=0) { /* Not the begining of a block */ 883 sizes[j] = 1; 884 i++; 885 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 886 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 887 i++; 888 } else { /* Begining of the block, so check if the complete block exists */ 889 flg = PETSC_TRUE; 890 for ( k=1; k<bs; k++ ) { 891 if (row+k != idx[i+k]) { /* break in the block */ 892 flg = PETSC_FALSE; 893 break; 894 } 895 } 896 if (flg == PETSC_TRUE) { /* No break in the bs */ 897 sizes[j] = bs; 898 i+= bs; 899 } else { 900 sizes[j] = 1; 901 i++; 902 } 903 } 904 } 905 *bs_max = j; 906 PetscFunctionReturn(0); 907 } 908 909 #undef __FUNC__ 910 #define __FUNC__ "MatZeroRows_SeqBAIJ" 911 int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag) 912 { 913 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 914 int ierr,i,j,k,count,is_n,*is_idx,*rows; 915 int bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max; 916 Scalar zero = 0.0; 917 MatScalar *aa; 918 919 PetscFunctionBegin; 920 /* Make a copy of the IS and sort it */ 921 ierr = ISGetSize(is,&is_n);CHKERRQ(ierr); 922 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 923 924 /* allocate memory for rows,sizes */ 925 rows = (int*)PetscMalloc((3*is_n+1)*sizeof(int));CHKPTRQ(rows); 926 sizes = rows + is_n; 927 928 /* initialize copy IS valurs to rows, and sort them */ 929 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 930 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 931 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 932 ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 933 934 for ( i=0,j=0; i<bs_max; j+=sizes[i],i++ ) { 935 row = rows[j]; 936 if (row < 0 || row > baij->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"row %d out of range",row); 937 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 938 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 939 if (sizes[i] == bs) { 940 if (diag) { 941 if (baij->ilen[row/bs] > 0) { 942 baij->ilen[row/bs] = 1; 943 baij->j[baij->i[row/bs]] = row/bs; 944 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 945 } 946 /* Now insert all the diagoanl values for this bs */ 947 for ( k=0; k<bs; k++ ) { 948 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 949 } 950 } else { /* (!diag) */ 951 baij->ilen[row/bs] = 0; 952 } /* end (!diag) */ 953 } else { /* (sizes[i] != bs) */ 954 #if defined (PETSC_USE_DEBUG) 955 if (sizes[i] != 1) SETERRQ(1,0,"Internal Error. Value should be 1"); 956 #endif 957 for ( k=0; k<count; k++ ) { 958 aa[0] = zero; 959 aa+=bs; 960 } 961 if (diag) { 962 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 963 } 964 } 965 } 966 967 ierr = PetscFree(rows);CHKERRQ(ierr); 968 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 969 PetscFunctionReturn(0); 970 } 971 972 #undef __FUNC__ 973 #define __FUNC__ "MatSetValues_SeqBAIJ" 974 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 975 { 976 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 977 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 978 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 979 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 980 int ridx,cidx,bs2=a->bs2,ierr; 981 MatScalar *ap,value,*aa=a->a,*bap; 982 983 PetscFunctionBegin; 984 for ( k=0; k<m; k++ ) { /* loop over added rows */ 985 row = im[k]; brow = row/bs; 986 if (row < 0) continue; 987 #if defined(PETSC_USE_BOPT_g) 988 if (row >= a->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large: row %d max %d",row,a->m); 989 #endif 990 rp = aj + ai[brow]; 991 ap = aa + bs2*ai[brow]; 992 rmax = imax[brow]; 993 nrow = ailen[brow]; 994 low = 0; 995 for ( l=0; l<n; l++ ) { /* loop over added columns */ 996 if (in[l] < 0) continue; 997 #if defined(PETSC_USE_BOPT_g) 998 if (in[l] >= a->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large: col %d max %d",in[l],a->n); 999 #endif 1000 col = in[l]; bcol = col/bs; 1001 ridx = row % bs; cidx = col % bs; 1002 if (roworiented) { 1003 value = v[l + k*n]; 1004 } else { 1005 value = v[k + l*m]; 1006 } 1007 if (!sorted) low = 0; high = nrow; 1008 while (high-low > 7) { 1009 t = (low+high)/2; 1010 if (rp[t] > bcol) high = t; 1011 else low = t; 1012 } 1013 for ( i=low; i<high; i++ ) { 1014 if (rp[i] > bcol) break; 1015 if (rp[i] == bcol) { 1016 bap = ap + bs2*i + bs*cidx + ridx; 1017 if (is == ADD_VALUES) *bap += value; 1018 else *bap = value; 1019 goto noinsert1; 1020 } 1021 } 1022 if (nonew == 1) goto noinsert1; 1023 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); 1024 if (nrow >= rmax) { 1025 /* there is no extra room in row, therefore enlarge */ 1026 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1027 MatScalar *new_a; 1028 1029 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); 1030 1031 /* Malloc new storage space */ 1032 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 1033 new_a = (MatScalar *) PetscMalloc( len );CHKPTRQ(new_a); 1034 new_j = (int *) (new_a + bs2*new_nz); 1035 new_i = new_j + new_nz; 1036 1037 /* copy over old data into new slots */ 1038 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} 1039 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1040 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 1041 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 1042 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 1043 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1044 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1045 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1046 /* free up old matrix storage */ 1047 ierr = PetscFree(a->a);CHKERRQ(ierr); 1048 if (!a->singlemalloc) { 1049 ierr = PetscFree(a->i);CHKERRQ(ierr); 1050 ierr = PetscFree(a->j);CHKERRQ(ierr); 1051 } 1052 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1053 a->singlemalloc = 1; 1054 1055 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1056 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 1057 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 1058 a->maxnz += bs2*CHUNKSIZE; 1059 a->reallocs++; 1060 a->nz++; 1061 } 1062 N = nrow++ - 1; 1063 /* shift up all the later entries in this row */ 1064 for ( ii=N; ii>=i; ii-- ) { 1065 rp[ii+1] = rp[ii]; 1066 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1067 } 1068 if (N>=i) { 1069 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1070 } 1071 rp[i] = bcol; 1072 ap[bs2*i + bs*cidx + ridx] = value; 1073 noinsert1:; 1074 low = i; 1075 } 1076 ailen[brow] = nrow; 1077 } 1078 PetscFunctionReturn(0); 1079 } 1080 1081 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*); 1082 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double); 1083 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int); 1084 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,int,MatReuse,Mat*); 1085 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatReuse,Mat**); 1086 extern int MatMultTrans_SeqBAIJ(Mat,Vec,Vec); 1087 extern int MatMultTransAdd_SeqBAIJ(Mat,Vec,Vec,Vec); 1088 extern int MatScale_SeqBAIJ(Scalar*,Mat); 1089 extern int MatNorm_SeqBAIJ(Mat,NormType,double *); 1090 extern int MatEqual_SeqBAIJ(Mat,Mat, PetscTruth*); 1091 extern int MatGetDiagonal_SeqBAIJ(Mat,Vec); 1092 extern int MatDiagonalScale_SeqBAIJ(Mat,Vec,Vec); 1093 extern int MatGetInfo_SeqBAIJ(Mat,MatInfoType,MatInfo *); 1094 extern int MatZeroEntries_SeqBAIJ(Mat); 1095 1096 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec); 1097 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec); 1098 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec); 1099 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec); 1100 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec); 1101 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec); 1102 extern int MatSolve_SeqBAIJ_6(Mat,Vec,Vec); 1103 extern int MatSolve_SeqBAIJ_7(Mat,Vec,Vec); 1104 1105 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*); 1106 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*); 1107 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*); 1108 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*); 1109 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*); 1110 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*); 1111 extern int MatLUFactorNumeric_SeqBAIJ_6(Mat,Mat*); 1112 1113 extern int MatMult_SeqBAIJ_1(Mat,Vec,Vec); 1114 extern int MatMult_SeqBAIJ_2(Mat,Vec,Vec); 1115 extern int MatMult_SeqBAIJ_3(Mat,Vec,Vec); 1116 extern int MatMult_SeqBAIJ_4(Mat,Vec,Vec); 1117 extern int MatMult_SeqBAIJ_5(Mat,Vec,Vec); 1118 extern int MatMult_SeqBAIJ_6(Mat,Vec,Vec); 1119 extern int MatMult_SeqBAIJ_7(Mat,Vec,Vec); 1120 extern int MatMult_SeqBAIJ_N(Mat,Vec,Vec); 1121 1122 extern int MatMultAdd_SeqBAIJ_1(Mat,Vec,Vec,Vec); 1123 extern int MatMultAdd_SeqBAIJ_2(Mat,Vec,Vec,Vec); 1124 extern int MatMultAdd_SeqBAIJ_3(Mat,Vec,Vec,Vec); 1125 extern int MatMultAdd_SeqBAIJ_4(Mat,Vec,Vec,Vec); 1126 extern int MatMultAdd_SeqBAIJ_5(Mat,Vec,Vec,Vec); 1127 extern int MatMultAdd_SeqBAIJ_6(Mat,Vec,Vec,Vec); 1128 extern int MatMultAdd_SeqBAIJ_7(Mat,Vec,Vec,Vec); 1129 extern int MatMultAdd_SeqBAIJ_N(Mat,Vec,Vec,Vec); 1130 1131 #undef __FUNC__ 1132 #define __FUNC__ "MatILUFactor_SeqBAIJ" 1133 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info) 1134 { 1135 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 1136 Mat outA; 1137 int ierr; 1138 PetscTruth row_identity, col_identity; 1139 1140 PetscFunctionBegin; 1141 if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,0,"Only levels = 0 supported for in-place ILU"); 1142 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1143 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1144 if (!row_identity || !col_identity) { 1145 SETERRQ(1,1,"Row and column permutations must be identity for in-place ILU"); 1146 } 1147 1148 outA = inA; 1149 inA->factor = FACTOR_LU; 1150 a->row = row; 1151 a->col = col; 1152 1153 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1154 ierr = ISInvertPermutation(col,&(a->icol));CHKERRQ(ierr); 1155 PLogObjectParent(inA,a->icol); 1156 1157 if (!a->solve_work) { 1158 a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work); 1159 PLogObjectMemory(inA,(a->m+a->bs)*sizeof(Scalar)); 1160 } 1161 1162 if (!a->diag) { 1163 ierr = MatMarkDiag_SeqBAIJ(inA);CHKERRQ(ierr); 1164 } 1165 /* 1166 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1167 for ILU(0) factorization with natural ordering 1168 */ 1169 switch (a->bs) { 1170 case 2: 1171 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering; 1172 inA->ops->solve = MatSolve_SeqBAIJ_2_NaturalOrdering; 1173 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 1174 break; 1175 case 3: 1176 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering; 1177 inA->ops->solve = MatSolve_SeqBAIJ_3_NaturalOrdering; 1178 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=3\n"); 1179 break; 1180 case 4: 1181 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering; 1182 inA->ops->solve = MatSolve_SeqBAIJ_4_NaturalOrdering; 1183 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 1184 break; 1185 case 5: 1186 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering; 1187 inA->ops->solve = MatSolve_SeqBAIJ_5_NaturalOrdering; 1188 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 1189 break; 1190 case 6: 1191 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering; 1192 inA->ops->solve = MatSolve_SeqBAIJ_6_NaturalOrdering; 1193 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 1194 break; 1195 case 7: 1196 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering; 1197 inA->ops->solve = MatSolve_SeqBAIJ_7_NaturalOrdering; 1198 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 1199 break; 1200 } 1201 1202 ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr); 1203 1204 PetscFunctionReturn(0); 1205 } 1206 #undef __FUNC__ 1207 #define __FUNC__ "MatPrintHelp_SeqBAIJ" 1208 int MatPrintHelp_SeqBAIJ(Mat A) 1209 { 1210 static int called = 0; 1211 MPI_Comm comm = A->comm; 1212 int ierr; 1213 1214 PetscFunctionBegin; 1215 if (called) {PetscFunctionReturn(0);} else called = 1; 1216 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1217 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1218 PetscFunctionReturn(0); 1219 } 1220 1221 EXTERN_C_BEGIN 1222 #undef __FUNC__ 1223 #define __FUNC__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1224 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices) 1225 { 1226 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1227 int i,nz,n; 1228 1229 PetscFunctionBegin; 1230 nz = baij->maxnz; 1231 n = baij->n; 1232 for (i=0; i<nz; i++) { 1233 baij->j[i] = indices[i]; 1234 } 1235 baij->nz = nz; 1236 for ( i=0; i<n; i++ ) { 1237 baij->ilen[i] = baij->imax[i]; 1238 } 1239 1240 PetscFunctionReturn(0); 1241 } 1242 EXTERN_C_END 1243 1244 #undef __FUNC__ 1245 #define __FUNC__ "MatSeqBAIJSetColumnIndices" 1246 /*@ 1247 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1248 in the matrix. 1249 1250 Input Parameters: 1251 + mat - the SeqBAIJ matrix 1252 - indices - the column indices 1253 1254 Level: advanced 1255 1256 Notes: 1257 This can be called if you have precomputed the nonzero structure of the 1258 matrix and want to provide it to the matrix object to improve the performance 1259 of the MatSetValues() operation. 1260 1261 You MUST have set the correct numbers of nonzeros per row in the call to 1262 MatCreateSeqBAIJ(). 1263 1264 MUST be called before any calls to MatSetValues(); 1265 1266 @*/ 1267 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices) 1268 { 1269 int ierr,(*f)(Mat,int *); 1270 1271 PetscFunctionBegin; 1272 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1273 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void **)&f);CHKERRQ(ierr); 1274 if (f) { 1275 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1276 } else { 1277 SETERRQ(1,1,"Wrong type of matrix to set column indices"); 1278 } 1279 PetscFunctionReturn(0); 1280 } 1281 1282 /* -------------------------------------------------------------------*/ 1283 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 1284 MatGetRow_SeqBAIJ, 1285 MatRestoreRow_SeqBAIJ, 1286 MatMult_SeqBAIJ_N, 1287 MatMultAdd_SeqBAIJ_N, 1288 MatMultTrans_SeqBAIJ, 1289 MatMultTransAdd_SeqBAIJ, 1290 MatSolve_SeqBAIJ_N, 1291 0, 1292 0, 1293 0, 1294 MatLUFactor_SeqBAIJ, 1295 0, 1296 0, 1297 MatTranspose_SeqBAIJ, 1298 MatGetInfo_SeqBAIJ, 1299 MatEqual_SeqBAIJ, 1300 MatGetDiagonal_SeqBAIJ, 1301 MatDiagonalScale_SeqBAIJ, 1302 MatNorm_SeqBAIJ, 1303 0, 1304 MatAssemblyEnd_SeqBAIJ, 1305 0, 1306 MatSetOption_SeqBAIJ, 1307 MatZeroEntries_SeqBAIJ, 1308 MatZeroRows_SeqBAIJ, 1309 MatLUFactorSymbolic_SeqBAIJ, 1310 MatLUFactorNumeric_SeqBAIJ_N, 1311 0, 1312 0, 1313 MatGetSize_SeqBAIJ, 1314 MatGetSize_SeqBAIJ, 1315 MatGetOwnershipRange_SeqBAIJ, 1316 MatILUFactorSymbolic_SeqBAIJ, 1317 0, 1318 0, 1319 0, 1320 MatDuplicate_SeqBAIJ, 1321 0, 1322 0, 1323 MatILUFactor_SeqBAIJ, 1324 0, 1325 0, 1326 MatGetSubMatrices_SeqBAIJ, 1327 MatIncreaseOverlap_SeqBAIJ, 1328 MatGetValues_SeqBAIJ, 1329 0, 1330 MatPrintHelp_SeqBAIJ, 1331 MatScale_SeqBAIJ, 1332 0, 1333 0, 1334 0, 1335 MatGetBlockSize_SeqBAIJ, 1336 MatGetRowIJ_SeqBAIJ, 1337 MatRestoreRowIJ_SeqBAIJ, 1338 0, 1339 0, 1340 0, 1341 0, 1342 0, 1343 0, 1344 MatSetValuesBlocked_SeqBAIJ, 1345 MatGetSubMatrix_SeqBAIJ, 1346 0, 1347 0, 1348 MatGetMaps_Petsc}; 1349 1350 EXTERN_C_BEGIN 1351 #undef __FUNC__ 1352 #define __FUNC__ "MatStoreValues_SeqBAIJ" 1353 int MatStoreValues_SeqBAIJ(Mat mat) 1354 { 1355 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1356 int nz = aij->i[aij->m]*aij->bs*aij->bs2; 1357 int ierr; 1358 1359 PetscFunctionBegin; 1360 if (aij->nonew != 1) { 1361 SETERRQ(1,1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1362 } 1363 1364 /* allocate space for values if not already there */ 1365 if (!aij->saved_values) { 1366 aij->saved_values = (Scalar *) PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(aij->saved_values); 1367 } 1368 1369 /* copy values over */ 1370 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(Scalar));CHKERRQ(ierr); 1371 PetscFunctionReturn(0); 1372 } 1373 EXTERN_C_END 1374 1375 EXTERN_C_BEGIN 1376 #undef __FUNC__ 1377 #define __FUNC__ "MatRetrieveValues_SeqBAIJ" 1378 int MatRetrieveValues_SeqBAIJ(Mat mat) 1379 { 1380 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1381 int nz = aij->i[aij->m]*aij->bs*aij->bs2,ierr; 1382 1383 PetscFunctionBegin; 1384 if (aij->nonew != 1) { 1385 SETERRQ(1,1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1386 } 1387 if (!aij->saved_values) { 1388 SETERRQ(1,1,"Must call MatStoreValues(A);first"); 1389 } 1390 1391 /* copy values over */ 1392 ierr = PetscMemcpy(aij->a, aij->saved_values,nz*sizeof(Scalar));CHKERRQ(ierr); 1393 PetscFunctionReturn(0); 1394 } 1395 EXTERN_C_END 1396 1397 #undef __FUNC__ 1398 #define __FUNC__ "MatCreateSeqBAIJ" 1399 /*@C 1400 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 1401 compressed row) format. For good matrix assembly performance the 1402 user should preallocate the matrix storage by setting the parameter nz 1403 (or the array nnz). By setting these parameters accurately, performance 1404 during matrix assembly can be increased by more than a factor of 50. 1405 1406 Collective on MPI_Comm 1407 1408 Input Parameters: 1409 + comm - MPI communicator, set to PETSC_COMM_SELF 1410 . bs - size of block 1411 . m - number of rows 1412 . n - number of columns 1413 . nz - number of block nonzeros per block row (same for all rows) 1414 - nnz - array containing the number of block nonzeros in the various block rows 1415 (possibly different for each block row) or PETSC_NULL 1416 1417 Output Parameter: 1418 . A - the matrix 1419 1420 Options Database Keys: 1421 . -mat_no_unroll - uses code that does not unroll the loops in the 1422 block calculations (much slower) 1423 . -mat_block_size - size of the blocks to use 1424 1425 Level: intermediate 1426 1427 Notes: 1428 The block AIJ format is fully compatible with standard Fortran 77 1429 storage. That is, the stored row and column indices can begin at 1430 either one (as in Fortran) or zero. See the users' manual for details. 1431 1432 Specify the preallocated storage with either nz or nnz (not both). 1433 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1434 allocation. For additional details, see the users manual chapter on 1435 matrices. 1436 1437 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1438 @*/ 1439 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A) 1440 { 1441 Mat B; 1442 Mat_SeqBAIJ *b; 1443 int i,len,ierr,flg,mbs,nbs,bs2,size; 1444 1445 PetscFunctionBegin; 1446 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1447 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,0,"Comm must be of size 1"); 1448 1449 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1450 mbs = m/bs; 1451 nbs = n/bs; 1452 bs2 = bs*bs; 1453 1454 if (mbs*bs!=m || nbs*bs!=n) { 1455 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Number rows, cols must be divisible by blocksize"); 1456 } 1457 1458 *A = 0; 1459 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATSEQBAIJ,"Mat",comm,MatDestroy,MatView); 1460 PLogObjectCreate(B); 1461 B->data = (void *) (b = PetscNew(Mat_SeqBAIJ));CHKPTRQ(b); 1462 ierr = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr); 1463 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1464 ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1465 if (!flg) { 1466 switch (bs) { 1467 case 1: 1468 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 1469 B->ops->solve = MatSolve_SeqBAIJ_1; 1470 B->ops->mult = MatMult_SeqBAIJ_1; 1471 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 1472 break; 1473 case 2: 1474 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 1475 B->ops->solve = MatSolve_SeqBAIJ_2; 1476 B->ops->mult = MatMult_SeqBAIJ_2; 1477 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 1478 break; 1479 case 3: 1480 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 1481 B->ops->solve = MatSolve_SeqBAIJ_3; 1482 B->ops->mult = MatMult_SeqBAIJ_3; 1483 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 1484 break; 1485 case 4: 1486 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 1487 B->ops->solve = MatSolve_SeqBAIJ_4; 1488 B->ops->mult = MatMult_SeqBAIJ_4; 1489 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 1490 break; 1491 case 5: 1492 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 1493 B->ops->solve = MatSolve_SeqBAIJ_5; 1494 B->ops->mult = MatMult_SeqBAIJ_5; 1495 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 1496 break; 1497 case 6: 1498 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 1499 B->ops->solve = MatSolve_SeqBAIJ_6; 1500 B->ops->mult = MatMult_SeqBAIJ_6; 1501 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 1502 break; 1503 case 7: 1504 B->ops->mult = MatMult_SeqBAIJ_7; 1505 B->ops->solve = MatSolve_SeqBAIJ_7; 1506 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 1507 break; 1508 } 1509 } 1510 B->ops->destroy = MatDestroy_SeqBAIJ; 1511 B->ops->view = MatView_SeqBAIJ; 1512 B->factor = 0; 1513 B->lupivotthreshold = 1.0; 1514 B->mapping = 0; 1515 b->row = 0; 1516 b->col = 0; 1517 b->icol = 0; 1518 b->reallocs = 0; 1519 b->saved_values = 0; 1520 1521 b->m = m; B->m = m; B->M = m; 1522 b->n = n; B->n = n; B->N = n; 1523 1524 ierr = MapCreateMPI(comm,m,m,&B->rmap);CHKERRQ(ierr); 1525 ierr = MapCreateMPI(comm,n,n,&B->cmap);CHKERRQ(ierr); 1526 1527 b->mbs = mbs; 1528 b->nbs = nbs; 1529 b->imax = (int *) PetscMalloc( (mbs+1)*sizeof(int) );CHKPTRQ(b->imax); 1530 if (nnz == PETSC_NULL) { 1531 if (nz == PETSC_DEFAULT) nz = 5; 1532 else if (nz <= 0) nz = 1; 1533 for ( i=0; i<mbs; i++ ) b->imax[i] = nz; 1534 nz = nz*mbs; 1535 } else { 1536 nz = 0; 1537 for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];} 1538 } 1539 1540 /* allocate the matrix space */ 1541 len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (b->m+1)*sizeof(int); 1542 b->a = (MatScalar *) PetscMalloc( len );CHKPTRQ(b->a); 1543 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1544 b->j = (int *) (b->a + nz*bs2); 1545 ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr); 1546 b->i = b->j + nz; 1547 b->singlemalloc = 1; 1548 1549 b->i[0] = 0; 1550 for (i=1; i<mbs+1; i++) { 1551 b->i[i] = b->i[i-1] + b->imax[i-1]; 1552 } 1553 1554 /* b->ilen will count nonzeros in each block row so far. */ 1555 b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); 1556 PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1557 for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;} 1558 1559 b->bs = bs; 1560 b->bs2 = bs2; 1561 b->mbs = mbs; 1562 b->nz = 0; 1563 b->maxnz = nz*bs2; 1564 b->sorted = 0; 1565 b->roworiented = 1; 1566 b->nonew = 0; 1567 b->diag = 0; 1568 b->solve_work = 0; 1569 b->mult_work = 0; 1570 b->spptr = 0; 1571 B->info.nz_unneeded = (double)b->maxnz; 1572 1573 *A = B; 1574 ierr = OptionsHasName(PETSC_NULL,"-help", &flg);CHKERRQ(ierr); 1575 if (flg) {ierr = MatPrintHelp(B);CHKERRQ(ierr); } 1576 1577 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C", 1578 "MatStoreValues_SeqBAIJ", 1579 (void*)MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 1580 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C", 1581 "MatRetrieveValues_SeqBAIJ", 1582 (void*)MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 1583 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 1584 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 1585 (void*)MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 1586 PetscFunctionReturn(0); 1587 } 1588 1589 #undef __FUNC__ 1590 #define __FUNC__ "MatDuplicate_SeqBAIJ" 1591 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1592 { 1593 Mat C; 1594 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data; 1595 int i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr; 1596 1597 PetscFunctionBegin; 1598 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,0,"Corrupt matrix"); 1599 1600 *B = 0; 1601 PetscHeaderCreate(C,_p_Mat,struct _MatOps,MAT_COOKIE,MATSEQBAIJ,"Mat",A->comm,MatDestroy,MatView); 1602 PLogObjectCreate(C); 1603 C->data = (void *) (c = PetscNew(Mat_SeqBAIJ));CHKPTRQ(c); 1604 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1605 C->ops->destroy = MatDestroy_SeqBAIJ; 1606 C->ops->view = MatView_SeqBAIJ; 1607 C->factor = A->factor; 1608 c->row = 0; 1609 c->col = 0; 1610 c->icol = 0; 1611 c->saved_values = 0; 1612 C->assembled = PETSC_TRUE; 1613 1614 c->m = C->m = a->m; 1615 c->n = C->n = a->n; 1616 C->M = a->m; 1617 C->N = a->n; 1618 1619 c->bs = a->bs; 1620 c->bs2 = a->bs2; 1621 c->mbs = a->mbs; 1622 c->nbs = a->nbs; 1623 1624 c->imax = (int *) PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(c->imax); 1625 c->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(c->ilen); 1626 for ( i=0; i<mbs; i++ ) { 1627 c->imax[i] = a->imax[i]; 1628 c->ilen[i] = a->ilen[i]; 1629 } 1630 1631 /* allocate the matrix space */ 1632 c->singlemalloc = 1; 1633 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1634 c->a = (MatScalar *) PetscMalloc( len );CHKPTRQ(c->a); 1635 c->j = (int *) (c->a + nz*bs2); 1636 c->i = c->j + nz; 1637 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1638 if (mbs > 0) { 1639 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1640 if (cpvalues == MAT_COPY_VALUES) { 1641 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1642 } else { 1643 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1644 } 1645 } 1646 1647 PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1648 c->sorted = a->sorted; 1649 c->roworiented = a->roworiented; 1650 c->nonew = a->nonew; 1651 1652 if (a->diag) { 1653 c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) );CHKPTRQ(c->diag); 1654 PLogObjectMemory(C,(mbs+1)*sizeof(int)); 1655 for ( i=0; i<mbs; i++ ) { 1656 c->diag[i] = a->diag[i]; 1657 } 1658 } else c->diag = 0; 1659 c->nz = a->nz; 1660 c->maxnz = a->maxnz; 1661 c->solve_work = 0; 1662 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1663 c->mult_work = 0; 1664 *B = C; 1665 ierr = FListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1666 PetscFunctionReturn(0); 1667 } 1668 1669 #undef __FUNC__ 1670 #define __FUNC__ "MatLoad_SeqBAIJ" 1671 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A) 1672 { 1673 Mat_SeqBAIJ *a; 1674 Mat B; 1675 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg; 1676 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 1677 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1678 int *masked, nmask,tmp,bs2,ishift; 1679 Scalar *aa; 1680 MPI_Comm comm = ((PetscObject) viewer)->comm; 1681 1682 PetscFunctionBegin; 1683 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 1684 bs2 = bs*bs; 1685 1686 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1687 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,0,"view must have one processor"); 1688 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1689 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1690 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not Mat object"); 1691 M = header[1]; N = header[2]; nz = header[3]; 1692 1693 if (header[3] < 0) { 1694 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as SeqBAIJ"); 1695 } 1696 1697 if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices"); 1698 1699 /* 1700 This code adds extra rows to make sure the number of rows is 1701 divisible by the blocksize 1702 */ 1703 mbs = M/bs; 1704 extra_rows = bs - M + bs*(mbs); 1705 if (extra_rows == bs) extra_rows = 0; 1706 else mbs++; 1707 if (extra_rows) { 1708 PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 1709 } 1710 1711 /* read in row lengths */ 1712 rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 1713 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1714 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 1715 1716 /* read in column indices */ 1717 jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) );CHKPTRQ(jj); 1718 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1719 for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i; 1720 1721 /* loop over row lengths determining block row lengths */ 1722 browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths); 1723 ierr = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1724 mask = (int *) PetscMalloc( 2*mbs*sizeof(int) );CHKPTRQ(mask); 1725 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1726 masked = mask + mbs; 1727 rowcount = 0; nzcount = 0; 1728 for ( i=0; i<mbs; i++ ) { 1729 nmask = 0; 1730 for ( j=0; j<bs; j++ ) { 1731 kmax = rowlengths[rowcount]; 1732 for ( k=0; k<kmax; k++ ) { 1733 tmp = jj[nzcount++]/bs; 1734 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1735 } 1736 rowcount++; 1737 } 1738 browlengths[i] += nmask; 1739 /* zero out the mask elements we set */ 1740 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 1741 } 1742 1743 /* create our matrix */ 1744 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr); 1745 B = *A; 1746 a = (Mat_SeqBAIJ *) B->data; 1747 1748 /* set matrix "i" values */ 1749 a->i[0] = 0; 1750 for ( i=1; i<= mbs; i++ ) { 1751 a->i[i] = a->i[i-1] + browlengths[i-1]; 1752 a->ilen[i-1] = browlengths[i-1]; 1753 } 1754 a->nz = 0; 1755 for ( i=0; i<mbs; i++ ) a->nz += browlengths[i]; 1756 1757 /* read in nonzero values */ 1758 aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa); 1759 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1760 for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0; 1761 1762 /* set "a" and "j" values into matrix */ 1763 nzcount = 0; jcount = 0; 1764 for ( i=0; i<mbs; i++ ) { 1765 nzcountb = nzcount; 1766 nmask = 0; 1767 for ( j=0; j<bs; j++ ) { 1768 kmax = rowlengths[i*bs+j]; 1769 for ( k=0; k<kmax; k++ ) { 1770 tmp = jj[nzcount++]/bs; 1771 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 1772 } 1773 rowcount++; 1774 } 1775 /* sort the masked values */ 1776 PetscSortInt(nmask,masked); 1777 1778 /* set "j" values into matrix */ 1779 maskcount = 1; 1780 for ( j=0; j<nmask; j++ ) { 1781 a->j[jcount++] = masked[j]; 1782 mask[masked[j]] = maskcount++; 1783 } 1784 /* set "a" values into matrix */ 1785 ishift = bs2*a->i[i]; 1786 for ( j=0; j<bs; j++ ) { 1787 kmax = rowlengths[i*bs+j]; 1788 for ( k=0; k<kmax; k++ ) { 1789 tmp = jj[nzcountb]/bs ; 1790 block = mask[tmp] - 1; 1791 point = jj[nzcountb] - bs*tmp; 1792 idx = ishift + bs2*block + j + bs*point; 1793 a->a[idx] = aa[nzcountb++]; 1794 } 1795 } 1796 /* zero out the mask elements we set */ 1797 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 1798 } 1799 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"Bad binary matrix"); 1800 1801 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1802 ierr = PetscFree(browlengths);CHKERRQ(ierr); 1803 ierr = PetscFree(aa);CHKERRQ(ierr); 1804 ierr = PetscFree(jj);CHKERRQ(ierr); 1805 ierr = PetscFree(mask);CHKERRQ(ierr); 1806 1807 B->assembled = PETSC_TRUE; 1808 1809 ierr = MatView_Private(B);CHKERRQ(ierr); 1810 PetscFunctionReturn(0); 1811 } 1812 1813 1814 1815