1 /*$Id: sbaij.c,v 1.62 2001/08/07 03:03:01 balay Exp $*/ 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 8 #include "src/vec/vecimpl.h" 9 #include "src/inline/spops.h" 10 #include "src/mat/impls/sbaij/seq/sbaij.h" 11 #if defined(PETSC_HAVE_SPOOLES) 12 EXTERN int MatUseSpooles_SeqSBAIJ(Mat); 13 #endif 14 15 #define CHUNKSIZE 10 16 17 /* 18 Checks for missing diagonals 19 */ 20 #undef __FUNCT__ 21 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ" 22 int MatMissingDiagonal_SeqSBAIJ(Mat A) 23 { 24 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 25 int *diag,*jj = a->j,i,ierr; 26 27 PetscFunctionBegin; 28 ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr); 29 diag = a->diag; 30 for (i=0; i<a->mbs; i++) { 31 if (jj[diag[i]] != i) SETERRQ1(1,"Matrix is missing diagonal number %d",i); 32 } 33 PetscFunctionReturn(0); 34 } 35 36 #undef __FUNCT__ 37 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ" 38 int MatMarkDiagonal_SeqSBAIJ(Mat A) 39 { 40 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 41 int i,mbs = a->mbs,ierr; 42 43 PetscFunctionBegin; 44 if (a->diag) PetscFunctionReturn(0); 45 46 ierr = PetscMalloc((mbs+1)*sizeof(int),&a->diag);CHKERRQ(ierr); 47 PetscLogObjectMemory(A,(mbs+1)*sizeof(int)); 48 for (i=0; i<mbs; i++) a->diag[i] = a->i[i]; 49 PetscFunctionReturn(0); 50 } 51 52 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 53 54 #undef __FUNCT__ 55 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ" 56 static int MatGetRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 57 { 58 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 59 60 PetscFunctionBegin; 61 if (ia) { 62 SETERRQ(1,"Function not yet written for SBAIJ format, only supports natural ordering"); 63 } 64 *nn = a->mbs; 65 PetscFunctionReturn(0); 66 } 67 68 #undef __FUNCT__ 69 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ" 70 static int MatRestoreRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 71 { 72 PetscFunctionBegin; 73 if (!ia) PetscFunctionReturn(0); 74 SETERRQ(1,"Function not yet written for SBAIJ format, only supports natural ordering"); 75 /* PetscFunctionReturn(0); */ 76 } 77 78 #undef __FUNCT__ 79 #define __FUNCT__ "MatGetBlockSize_SeqSBAIJ" 80 int MatGetBlockSize_SeqSBAIJ(Mat mat,int *bs) 81 { 82 Mat_SeqSBAIJ *sbaij = (Mat_SeqSBAIJ*)mat->data; 83 84 PetscFunctionBegin; 85 *bs = sbaij->bs; 86 PetscFunctionReturn(0); 87 } 88 89 #undef __FUNCT__ 90 #define __FUNCT__ "MatDestroy_SeqSBAIJ" 91 int MatDestroy_SeqSBAIJ(Mat A) 92 { 93 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 94 int ierr; 95 96 PetscFunctionBegin; 97 #if defined(PETSC_USE_LOG) 98 PetscLogObjectState((PetscObject)A,"Rows=%d, s_NZ=%d",A->m,a->s_nz); 99 #endif 100 ierr = PetscFree(a->a);CHKERRQ(ierr); 101 if (!a->singlemalloc) { 102 ierr = PetscFree(a->i);CHKERRQ(ierr); 103 ierr = PetscFree(a->j);CHKERRQ(ierr); 104 } 105 if (a->row) { 106 ierr = ISDestroy(a->row);CHKERRQ(ierr); 107 } 108 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 109 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 110 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 111 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 112 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 113 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 114 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 115 116 if (a->inew){ 117 ierr = PetscFree(a->inew);CHKERRQ(ierr); 118 a->inew = 0; 119 } 120 ierr = PetscFree(a);CHKERRQ(ierr); 121 PetscFunctionReturn(0); 122 } 123 124 #undef __FUNCT__ 125 #define __FUNCT__ "MatSetOption_SeqSBAIJ" 126 int MatSetOption_SeqSBAIJ(Mat A,MatOption op) 127 { 128 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 129 130 PetscFunctionBegin; 131 switch (op) { 132 case MAT_ROW_ORIENTED: 133 a->roworiented = PETSC_TRUE; 134 break; 135 case MAT_COLUMN_ORIENTED: 136 a->roworiented = PETSC_FALSE; 137 break; 138 case MAT_COLUMNS_SORTED: 139 a->sorted = PETSC_TRUE; 140 break; 141 case MAT_COLUMNS_UNSORTED: 142 a->sorted = PETSC_FALSE; 143 break; 144 case MAT_KEEP_ZEROED_ROWS: 145 a->keepzeroedrows = PETSC_TRUE; 146 break; 147 case MAT_NO_NEW_NONZERO_LOCATIONS: 148 a->nonew = 1; 149 break; 150 case MAT_NEW_NONZERO_LOCATION_ERR: 151 a->nonew = -1; 152 break; 153 case MAT_NEW_NONZERO_ALLOCATION_ERR: 154 a->nonew = -2; 155 break; 156 case MAT_YES_NEW_NONZERO_LOCATIONS: 157 a->nonew = 0; 158 break; 159 case MAT_ROWS_SORTED: 160 case MAT_ROWS_UNSORTED: 161 case MAT_YES_NEW_DIAGONALS: 162 case MAT_IGNORE_OFF_PROC_ENTRIES: 163 case MAT_USE_HASH_TABLE: 164 case MAT_USE_SINGLE_PRECISION_SOLVES: 165 PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n"); 166 break; 167 case MAT_NO_NEW_DIAGONALS: 168 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 169 default: 170 SETERRQ(PETSC_ERR_SUP,"unknown option"); 171 } 172 PetscFunctionReturn(0); 173 } 174 175 #undef __FUNCT__ 176 #define __FUNCT__ "MatGetRow_SeqSBAIJ" 177 int MatGetRow_SeqSBAIJ(Mat A,int row,int *ncols,int **cols,PetscScalar **v) 178 { 179 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 180 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2,ierr; 181 MatScalar *aa,*aa_i; 182 PetscScalar *v_i; 183 184 PetscFunctionBegin; 185 bs = a->bs; 186 ai = a->i; 187 aj = a->j; 188 aa = a->a; 189 bs2 = a->bs2; 190 191 if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range"); 192 193 bn = row/bs; /* Block number */ 194 bp = row % bs; /* Block position */ 195 M = ai[bn+1] - ai[bn]; 196 *ncols = bs*M; 197 198 if (v) { 199 *v = 0; 200 if (*ncols) { 201 ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr); 202 for (i=0; i<M; i++) { /* for each block in the block row */ 203 v_i = *v + i*bs; 204 aa_i = aa + bs2*(ai[bn] + i); 205 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 206 } 207 } 208 } 209 210 if (cols) { 211 *cols = 0; 212 if (*ncols) { 213 ierr = PetscMalloc((*ncols+row)*sizeof(int),cols);CHKERRQ(ierr); 214 for (i=0; i<M; i++) { /* for each block in the block row */ 215 cols_i = *cols + i*bs; 216 itmp = bs*aj[ai[bn] + i]; 217 for (j=0; j<bs; j++) {cols_i[j] = itmp++;} 218 } 219 } 220 } 221 222 /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */ 223 /* this segment is currently removed, so only entries in the upper triangle are obtained */ 224 #ifdef column_search 225 v_i = *v + M*bs; 226 cols_i = *cols + M*bs; 227 for (i=0; i<bn; i++){ /* for each block row */ 228 M = ai[i+1] - ai[i]; 229 for (j=0; j<M; j++){ 230 itmp = aj[ai[i] + j]; /* block column value */ 231 if (itmp == bn){ 232 aa_i = aa + bs2*(ai[i] + j) + bs*bp; 233 for (k=0; k<bs; k++) { 234 *cols_i++ = i*bs+k; 235 *v_i++ = aa_i[k]; 236 } 237 *ncols += bs; 238 break; 239 } 240 } 241 } 242 #endif 243 244 PetscFunctionReturn(0); 245 } 246 247 #undef __FUNCT__ 248 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ" 249 int MatRestoreRow_SeqSBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 250 { 251 int ierr; 252 253 PetscFunctionBegin; 254 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 255 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 256 PetscFunctionReturn(0); 257 } 258 259 #undef __FUNCT__ 260 #define __FUNCT__ "MatTranspose_SeqSBAIJ" 261 int MatTranspose_SeqSBAIJ(Mat A,Mat *B) 262 { 263 PetscFunctionBegin; 264 SETERRQ(1,"Matrix is symmetric. MatTranspose() should not be called"); 265 /* PetscFunctionReturn(0); */ 266 } 267 268 #undef __FUNCT__ 269 #define __FUNCT__ "MatView_SeqSBAIJ_Binary" 270 static int MatView_SeqSBAIJ_Binary(Mat A,PetscViewer viewer) 271 { 272 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 273 int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 274 PetscScalar *aa; 275 FILE *file; 276 277 PetscFunctionBegin; 278 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 279 ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr); 280 col_lens[0] = MAT_FILE_COOKIE; 281 282 col_lens[1] = A->m; 283 col_lens[2] = A->m; 284 col_lens[3] = a->s_nz*bs2; 285 286 /* store lengths of each row and write (including header) to file */ 287 count = 0; 288 for (i=0; i<a->mbs; i++) { 289 for (j=0; j<bs; j++) { 290 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 291 } 292 } 293 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr); 294 ierr = PetscFree(col_lens);CHKERRQ(ierr); 295 296 /* store column indices (zero start index) */ 297 ierr = PetscMalloc((a->s_nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr); 298 count = 0; 299 for (i=0; i<a->mbs; i++) { 300 for (j=0; j<bs; j++) { 301 for (k=a->i[i]; k<a->i[i+1]; k++) { 302 for (l=0; l<bs; l++) { 303 jj[count++] = bs*a->j[k] + l; 304 } 305 } 306 } 307 } 308 ierr = PetscBinaryWrite(fd,jj,bs2*a->s_nz,PETSC_INT,0);CHKERRQ(ierr); 309 ierr = PetscFree(jj);CHKERRQ(ierr); 310 311 /* store nonzero values */ 312 ierr = PetscMalloc((a->s_nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 313 count = 0; 314 for (i=0; i<a->mbs; i++) { 315 for (j=0; j<bs; j++) { 316 for (k=a->i[i]; k<a->i[i+1]; k++) { 317 for (l=0; l<bs; l++) { 318 aa[count++] = a->a[bs2*k + l*bs + j]; 319 } 320 } 321 } 322 } 323 ierr = PetscBinaryWrite(fd,aa,bs2*a->s_nz,PETSC_SCALAR,0);CHKERRQ(ierr); 324 ierr = PetscFree(aa);CHKERRQ(ierr); 325 326 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 327 if (file) { 328 fprintf(file,"-matload_block_size %d\n",a->bs); 329 } 330 PetscFunctionReturn(0); 331 } 332 333 #undef __FUNCT__ 334 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII" 335 static int MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer) 336 { 337 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 338 int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2; 339 char *name; 340 PetscViewerFormat format; 341 342 PetscFunctionBegin; 343 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 344 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 345 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_LONG) { 346 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 347 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 348 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 349 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 350 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 351 for (i=0; i<a->mbs; i++) { 352 for (j=0; j<bs; j++) { 353 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 354 for (k=a->i[i]; k<a->i[i+1]; k++) { 355 for (l=0; l<bs; l++) { 356 #if defined(PETSC_USE_COMPLEX) 357 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 358 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 359 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 360 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 361 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 362 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 363 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 364 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 365 } 366 #else 367 if (a->a[bs2*k + l*bs + j] != 0.0) { 368 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 369 } 370 #endif 371 } 372 } 373 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 374 } 375 } 376 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 377 } else { 378 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 379 for (i=0; i<a->mbs; i++) { 380 for (j=0; j<bs; j++) { 381 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 382 for (k=a->i[i]; k<a->i[i+1]; k++) { 383 for (l=0; l<bs; l++) { 384 #if defined(PETSC_USE_COMPLEX) 385 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 386 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 387 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 388 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 389 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 390 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 391 } else { 392 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 393 } 394 #else 395 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 396 #endif 397 } 398 } 399 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 400 } 401 } 402 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 403 } 404 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 405 PetscFunctionReturn(0); 406 } 407 408 #undef __FUNCT__ 409 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom" 410 static int MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 411 { 412 Mat A = (Mat) Aa; 413 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 414 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank; 415 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 416 MatScalar *aa; 417 MPI_Comm comm; 418 PetscViewer viewer; 419 420 PetscFunctionBegin; 421 /* 422 This is nasty. If this is called from an originally parallel matrix 423 then all processes call this,but only the first has the matrix so the 424 rest should return immediately. 425 */ 426 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 427 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 428 if (rank) PetscFunctionReturn(0); 429 430 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 431 432 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 433 PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric"); 434 435 /* loop over matrix elements drawing boxes */ 436 color = PETSC_DRAW_BLUE; 437 for (i=0,row=0; i<mbs; i++,row+=bs) { 438 for (j=a->i[i]; j<a->i[i+1]; j++) { 439 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 440 x_l = a->j[j]*bs; x_r = x_l + 1.0; 441 aa = a->a + j*bs2; 442 for (k=0; k<bs; k++) { 443 for (l=0; l<bs; l++) { 444 if (PetscRealPart(*aa++) >= 0.) continue; 445 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 446 } 447 } 448 } 449 } 450 color = PETSC_DRAW_CYAN; 451 for (i=0,row=0; i<mbs; i++,row+=bs) { 452 for (j=a->i[i]; j<a->i[i+1]; j++) { 453 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 454 x_l = a->j[j]*bs; x_r = x_l + 1.0; 455 aa = a->a + j*bs2; 456 for (k=0; k<bs; k++) { 457 for (l=0; l<bs; l++) { 458 if (PetscRealPart(*aa++) != 0.) continue; 459 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 460 } 461 } 462 } 463 } 464 465 color = PETSC_DRAW_RED; 466 for (i=0,row=0; i<mbs; i++,row+=bs) { 467 for (j=a->i[i]; j<a->i[i+1]; j++) { 468 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 469 x_l = a->j[j]*bs; x_r = x_l + 1.0; 470 aa = a->a + j*bs2; 471 for (k=0; k<bs; k++) { 472 for (l=0; l<bs; l++) { 473 if (PetscRealPart(*aa++) <= 0.) continue; 474 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 475 } 476 } 477 } 478 } 479 PetscFunctionReturn(0); 480 } 481 482 #undef __FUNCT__ 483 #define __FUNCT__ "MatView_SeqSBAIJ_Draw" 484 static int MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer) 485 { 486 int ierr; 487 PetscReal xl,yl,xr,yr,w,h; 488 PetscDraw draw; 489 PetscTruth isnull; 490 491 PetscFunctionBegin; 492 493 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 494 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 495 496 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 497 xr = A->m; yr = A->m; h = yr/10.0; w = xr/10.0; 498 xr += w; yr += h; xl = -w; yl = -h; 499 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 500 ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 501 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 502 PetscFunctionReturn(0); 503 } 504 505 #undef __FUNCT__ 506 #define __FUNCT__ "MatView_SeqSBAIJ" 507 int MatView_SeqSBAIJ(Mat A,PetscViewer viewer) 508 { 509 int ierr; 510 PetscTruth issocket,isascii,isbinary,isdraw; 511 512 PetscFunctionBegin; 513 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 514 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 515 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 516 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 517 if (issocket) { 518 SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported"); 519 } else if (isascii){ 520 ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 521 } else if (isbinary) { 522 ierr = MatView_SeqSBAIJ_Binary(A,viewer);CHKERRQ(ierr); 523 } else if (isdraw) { 524 ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr); 525 } else { 526 SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name); 527 } 528 PetscFunctionReturn(0); 529 } 530 531 532 #undef __FUNCT__ 533 #define __FUNCT__ "MatGetValues_SeqSBAIJ" 534 int MatGetValues_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v) 535 { 536 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 537 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 538 int *ai = a->i,*ailen = a->ilen; 539 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 540 MatScalar *ap,*aa = a->a,zero = 0.0; 541 542 PetscFunctionBegin; 543 for (k=0; k<m; k++) { /* loop over rows */ 544 row = im[k]; brow = row/bs; 545 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 546 if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 547 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 548 nrow = ailen[brow]; 549 for (l=0; l<n; l++) { /* loop over columns */ 550 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 551 if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 552 col = in[l] ; 553 bcol = col/bs; 554 cidx = col%bs; 555 ridx = row%bs; 556 high = nrow; 557 low = 0; /* assume unsorted */ 558 while (high-low > 5) { 559 t = (low+high)/2; 560 if (rp[t] > bcol) high = t; 561 else low = t; 562 } 563 for (i=low; i<high; i++) { 564 if (rp[i] > bcol) break; 565 if (rp[i] == bcol) { 566 *v++ = ap[bs2*i+bs*cidx+ridx]; 567 goto finished; 568 } 569 } 570 *v++ = zero; 571 finished:; 572 } 573 } 574 PetscFunctionReturn(0); 575 } 576 577 578 #undef __FUNCT__ 579 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ" 580 int MatSetValuesBlocked_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is) 581 { 582 PetscFunctionBegin; 583 SETERRQ(1,"Function not yet written for SBAIJ format"); 584 } 585 586 #undef __FUNCT__ 587 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ" 588 int MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode) 589 { 590 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 591 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 592 int m = A->m,*ip,N,*ailen = a->ilen; 593 int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 594 MatScalar *aa = a->a,*ap; 595 #if defined(PETSC_HAVE_SPOOLES) 596 PetscTruth flag; 597 #endif 598 599 PetscFunctionBegin; 600 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 601 602 if (m) rmax = ailen[0]; 603 for (i=1; i<mbs; i++) { 604 /* move each row back by the amount of empty slots (fshift) before it*/ 605 fshift += imax[i-1] - ailen[i-1]; 606 rmax = PetscMax(rmax,ailen[i]); 607 if (fshift) { 608 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 609 N = ailen[i]; 610 for (j=0; j<N; j++) { 611 ip[j-fshift] = ip[j]; 612 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 613 } 614 } 615 ai[i] = ai[i-1] + ailen[i-1]; 616 } 617 if (mbs) { 618 fshift += imax[mbs-1] - ailen[mbs-1]; 619 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 620 } 621 /* reset ilen and imax for each row */ 622 for (i=0; i<mbs; i++) { 623 ailen[i] = imax[i] = ai[i+1] - ai[i]; 624 } 625 a->s_nz = ai[mbs]; 626 627 /* diagonals may have moved, reset it */ 628 if (a->diag) { 629 ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(int));CHKERRQ(ierr); 630 } 631 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 632 m,A->m,a->bs,fshift*bs2,a->s_nz*bs2); 633 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %d\n", 634 a->reallocs); 635 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 636 a->reallocs = 0; 637 A->info.nz_unneeded = (PetscReal)fshift*bs2; 638 639 #if defined(PETSC_HAVE_SPOOLES) 640 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_sbaij_spooles",&flag);CHKERRQ(ierr); 641 if (flag) { ierr = MatUseSpooles_SeqSBAIJ(A);CHKERRQ(ierr); } 642 #endif 643 644 PetscFunctionReturn(0); 645 } 646 647 /* 648 This function returns an array of flags which indicate the locations of contiguous 649 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 650 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 651 Assume: sizes should be long enough to hold all the values. 652 */ 653 #undef __FUNCT__ 654 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks" 655 static int MatZeroRows_SeqSBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 656 { 657 int i,j,k,row; 658 PetscTruth flg; 659 660 PetscFunctionBegin; 661 for (i=0,j=0; i<n; j++) { 662 row = idx[i]; 663 if (row%bs!=0) { /* Not the begining of a block */ 664 sizes[j] = 1; 665 i++; 666 } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */ 667 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 668 i++; 669 } else { /* Begining of the block, so check if the complete block exists */ 670 flg = PETSC_TRUE; 671 for (k=1; k<bs; k++) { 672 if (row+k != idx[i+k]) { /* break in the block */ 673 flg = PETSC_FALSE; 674 break; 675 } 676 } 677 if (flg == PETSC_TRUE) { /* No break in the bs */ 678 sizes[j] = bs; 679 i+= bs; 680 } else { 681 sizes[j] = 1; 682 i++; 683 } 684 } 685 } 686 *bs_max = j; 687 PetscFunctionReturn(0); 688 } 689 690 #undef __FUNCT__ 691 #define __FUNCT__ "MatZeroRows_SeqSBAIJ" 692 int MatZeroRows_SeqSBAIJ(Mat A,IS is,PetscScalar *diag) 693 { 694 Mat_SeqSBAIJ *sbaij=(Mat_SeqSBAIJ*)A->data; 695 int ierr,i,j,k,count,is_n,*is_idx,*rows; 696 int bs=sbaij->bs,bs2=sbaij->bs2,*sizes,row,bs_max; 697 PetscScalar zero = 0.0; 698 MatScalar *aa; 699 700 PetscFunctionBegin; 701 /* Make a copy of the IS and sort it */ 702 ierr = ISGetSize(is,&is_n);CHKERRQ(ierr); 703 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 704 705 /* allocate memory for rows,sizes */ 706 ierr = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr); 707 sizes = rows + is_n; 708 709 /* initialize copy IS values to rows, and sort them */ 710 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 711 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 712 if (sbaij->keepzeroedrows) { /* do not change nonzero structure */ 713 for (i=0; i<is_n; i++) { sizes[i] = 1; } /* sizes: size of blocks, = 1 or bs */ 714 bs_max = is_n; /* bs_max: num. of contiguous block row in the row */ 715 } else { 716 ierr = MatZeroRows_SeqSBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 717 } 718 ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 719 720 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 721 row = rows[j]; /* row to be zeroed */ 722 if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row); 723 count = (sbaij->i[row/bs +1] - sbaij->i[row/bs])*bs; /* num. of elements in the row */ 724 aa = sbaij->a + sbaij->i[row/bs]*bs2 + (row%bs); 725 if (sizes[i] == bs && !sbaij->keepzeroedrows) { 726 if (diag) { 727 if (sbaij->ilen[row/bs] > 0) { 728 sbaij->ilen[row/bs] = 1; 729 sbaij->j[sbaij->i[row/bs]] = row/bs; 730 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 731 } 732 /* Now insert all the diagoanl values for this bs */ 733 for (k=0; k<bs; k++) { 734 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 735 } 736 } else { /* (!diag) */ 737 sbaij->ilen[row/bs] = 0; 738 } /* end (!diag) */ 739 } else { /* (sizes[i] != bs), broken block */ 740 #if defined (PETSC_USE_DEBUG) 741 if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1"); 742 #endif 743 for (k=0; k<count; k++) { 744 aa[0] = zero; 745 aa+=bs; 746 } 747 if (diag) { 748 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 749 } 750 } 751 } 752 753 ierr = PetscFree(rows);CHKERRQ(ierr); 754 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 755 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 756 PetscFunctionReturn(0); 757 } 758 759 /* Only add/insert a(i,j) with i<=j (blocks). 760 Any a(i,j) with i>j input by user is ingored. 761 */ 762 763 #undef __FUNCT__ 764 #define __FUNCT__ "MatSetValues_SeqSBAIJ" 765 int MatSetValues_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is) 766 { 767 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 768 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 769 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 770 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 771 int ridx,cidx,bs2=a->bs2,ierr; 772 MatScalar *ap,value,*aa=a->a,*bap; 773 774 PetscFunctionBegin; 775 776 for (k=0; k<m; k++) { /* loop over added rows */ 777 row = im[k]; /* row number */ 778 brow = row/bs; /* block row number */ 779 if (row < 0) continue; 780 #if defined(PETSC_USE_BOPT_g) 781 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m); 782 #endif 783 rp = aj + ai[brow]; /*ptr to beginning of column value of the row block*/ 784 ap = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/ 785 rmax = imax[brow]; /* maximum space allocated for this row */ 786 nrow = ailen[brow]; /* actual length of this row */ 787 low = 0; 788 789 for (l=0; l<n; l++) { /* loop over added columns */ 790 if (in[l] < 0) continue; 791 #if defined(PETSC_USE_BOPT_g) 792 if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->m); 793 #endif 794 col = in[l]; 795 bcol = col/bs; /* block col number */ 796 797 if (brow <= bcol){ 798 ridx = row % bs; cidx = col % bs; /*row and col index inside the block */ 799 if ((brow==bcol && ridx<=cidx) || (brow<bcol)){ 800 /* element value a(k,l) */ 801 if (roworiented) { 802 value = v[l + k*n]; 803 } else { 804 value = v[k + l*m]; 805 } 806 807 /* move pointer bap to a(k,l) quickly and add/insert value */ 808 if (!sorted) low = 0; high = nrow; 809 while (high-low > 7) { 810 t = (low+high)/2; 811 if (rp[t] > bcol) high = t; 812 else low = t; 813 } 814 for (i=low; i<high; i++) { 815 /* printf("The loop of i=low.., rp[%d]=%d\n",i,rp[i]); */ 816 if (rp[i] > bcol) break; 817 if (rp[i] == bcol) { 818 bap = ap + bs2*i + bs*cidx + ridx; 819 if (is == ADD_VALUES) *bap += value; 820 else *bap = value; 821 /* for diag block, add/insert its symmetric element a(cidx,ridx) */ 822 if (brow == bcol && ridx < cidx){ 823 bap = ap + bs2*i + bs*ridx + cidx; 824 if (is == ADD_VALUES) *bap += value; 825 else *bap = value; 826 } 827 goto noinsert1; 828 } 829 } 830 831 if (nonew == 1) goto noinsert1; 832 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 833 if (nrow >= rmax) { 834 /* there is no extra room in row, therefore enlarge */ 835 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 836 MatScalar *new_a; 837 838 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 839 840 /* Malloc new storage space */ 841 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 842 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 843 new_j = (int*)(new_a + bs2*new_nz); 844 new_i = new_j + new_nz; 845 846 /* copy over old data into new slots */ 847 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 848 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 849 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 850 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 851 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 852 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 853 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 854 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 855 /* free up old matrix storage */ 856 ierr = PetscFree(a->a);CHKERRQ(ierr); 857 if (!a->singlemalloc) { 858 ierr = PetscFree(a->i);CHKERRQ(ierr); 859 ierr = PetscFree(a->j);CHKERRQ(ierr); 860 } 861 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 862 a->singlemalloc = PETSC_TRUE; 863 864 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 865 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 866 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 867 a->s_maxnz += bs2*CHUNKSIZE; 868 a->reallocs++; 869 a->s_nz++; 870 } 871 872 N = nrow++ - 1; 873 /* shift up all the later entries in this row */ 874 for (ii=N; ii>=i; ii--) { 875 rp[ii+1] = rp[ii]; 876 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 877 } 878 if (N>=i) { 879 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 880 } 881 rp[i] = bcol; 882 ap[bs2*i + bs*cidx + ridx] = value; 883 noinsert1:; 884 low = i; 885 /* } */ 886 } 887 } /* end of if .. if.. */ 888 } /* end of loop over added columns */ 889 ailen[brow] = nrow; 890 } /* end of loop over added rows */ 891 892 PetscFunctionReturn(0); 893 } 894 895 extern int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,PetscReal,Mat*); 896 extern int MatCholeskyFactor_SeqSBAIJ(Mat,IS,PetscReal); 897 extern int MatIncreaseOverlap_SeqSBAIJ(Mat,int,IS*,int); 898 extern int MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,int,MatReuse,Mat*); 899 extern int MatGetSubMatrices_SeqSBAIJ(Mat,int,IS*,IS*,MatReuse,Mat**); 900 extern int MatMultTranspose_SeqSBAIJ(Mat,Vec,Vec); 901 extern int MatMultTransposeAdd_SeqSBAIJ(Mat,Vec,Vec,Vec); 902 extern int MatScale_SeqSBAIJ(PetscScalar*,Mat); 903 extern int MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *); 904 extern int MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*); 905 extern int MatGetDiagonal_SeqSBAIJ(Mat,Vec); 906 extern int MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec); 907 extern int MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *); 908 extern int MatZeroEntries_SeqSBAIJ(Mat); 909 extern int MatGetRowMax_SeqSBAIJ(Mat,Vec); 910 911 extern int MatSolve_SeqSBAIJ_N(Mat,Vec,Vec); 912 extern int MatSolve_SeqSBAIJ_1(Mat,Vec,Vec); 913 extern int MatSolve_SeqSBAIJ_2(Mat,Vec,Vec); 914 extern int MatSolve_SeqSBAIJ_3(Mat,Vec,Vec); 915 extern int MatSolve_SeqSBAIJ_4(Mat,Vec,Vec); 916 extern int MatSolve_SeqSBAIJ_5(Mat,Vec,Vec); 917 extern int MatSolve_SeqSBAIJ_6(Mat,Vec,Vec); 918 extern int MatSolve_SeqSBAIJ_7(Mat,Vec,Vec); 919 extern int MatSolveTranspose_SeqSBAIJ_7(Mat,Vec,Vec); 920 extern int MatSolveTranspose_SeqSBAIJ_6(Mat,Vec,Vec); 921 extern int MatSolveTranspose_SeqSBAIJ_5(Mat,Vec,Vec); 922 extern int MatSolveTranspose_SeqSBAIJ_4(Mat,Vec,Vec); 923 extern int MatSolveTranspose_SeqSBAIJ_3(Mat,Vec,Vec); 924 extern int MatSolveTranspose_SeqSBAIJ_2(Mat,Vec,Vec); 925 extern int MatSolveTranspose_SeqSBAIJ_1(Mat,Vec,Vec); 926 927 extern int MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec); 928 extern int MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec); 929 extern int MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec); 930 extern int MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec); 931 extern int MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec); 932 extern int MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec); 933 extern int MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec); 934 extern int MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec); 935 936 extern int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,Mat*); 937 extern int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,Mat*); 938 extern int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,Mat*); 939 extern int MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,Mat*); 940 extern int MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,Mat*); 941 extern int MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,Mat*); 942 extern int MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,Mat*); 943 extern int MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,Mat*); 944 945 extern int MatMult_SeqSBAIJ_1(Mat,Vec,Vec); 946 extern int MatMult_SeqSBAIJ_2(Mat,Vec,Vec); 947 extern int MatMult_SeqSBAIJ_3(Mat,Vec,Vec); 948 extern int MatMult_SeqSBAIJ_4(Mat,Vec,Vec); 949 extern int MatMult_SeqSBAIJ_5(Mat,Vec,Vec); 950 extern int MatMult_SeqSBAIJ_6(Mat,Vec,Vec); 951 extern int MatMult_SeqSBAIJ_7(Mat,Vec,Vec); 952 extern int MatMult_SeqSBAIJ_N(Mat,Vec,Vec); 953 954 extern int MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec); 955 extern int MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec); 956 extern int MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec); 957 extern int MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec); 958 extern int MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec); 959 extern int MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec); 960 extern int MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec); 961 extern int MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec); 962 963 #ifdef HAVE_MatICCFactor 964 /* modefied from MatILUFactor_SeqSBAIJ, needs further work! */ 965 #undef __FUNCT__ 966 #define __FUNCT__ "MatICCFactor_SeqSBAIJ" 967 int MatICCFactor_SeqSBAIJ(Mat inA,IS row,PetscReal fill,int level) 968 { 969 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 970 Mat outA; 971 int ierr; 972 PetscTruth row_identity,col_identity; 973 974 PetscFunctionBegin; 975 /* 976 if (level != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 977 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 978 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 979 if (!row_identity || !col_identity) { 980 SETERRQ(1,"Row and column permutations must be identity for in-place ICC"); 981 } 982 */ 983 984 outA = inA; 985 inA->factor = FACTOR_CHOLESKY; 986 987 if (!a->diag) { 988 ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr); 989 } 990 /* 991 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 992 for ILU(0) factorization with natural ordering 993 */ 994 switch (a->bs) { 995 case 1: 996 inA->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 997 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 998 PetscLoginfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n"); 999 case 2: 1000 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 1001 inA->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1002 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1003 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 1004 break; 1005 case 3: 1006 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 1007 inA->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1008 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1009 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n"); 1010 break; 1011 case 4: 1012 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 1013 inA->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1014 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1015 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 1016 break; 1017 case 5: 1018 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 1019 inA->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1020 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1021 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 1022 break; 1023 case 6: 1024 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 1025 inA->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1026 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1027 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 1028 break; 1029 case 7: 1030 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 1031 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1032 inA->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1033 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 1034 break; 1035 default: 1036 a->row = row; 1037 a->icol = col; 1038 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1039 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1040 1041 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1042 ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr); 1043 PetscLogObjectParent(inA,a->icol); 1044 1045 if (!a->solve_work) { 1046 ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1047 PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar)); 1048 } 1049 } 1050 1051 ierr = MatCholeskyFactorNumeric(inA,&outA);CHKERRQ(ierr); 1052 1053 PetscFunctionReturn(0); 1054 } 1055 #endif 1056 1057 #undef __FUNCT__ 1058 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ" 1059 int MatPrintHelp_SeqSBAIJ(Mat A) 1060 { 1061 static PetscTruth called = PETSC_FALSE; 1062 MPI_Comm comm = A->comm; 1063 int ierr; 1064 1065 PetscFunctionBegin; 1066 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1067 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1068 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1069 PetscFunctionReturn(0); 1070 } 1071 1072 EXTERN_C_BEGIN 1073 #undef __FUNCT__ 1074 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ" 1075 int MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,int *indices) 1076 { 1077 Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data; 1078 int i,nz,n; 1079 1080 PetscFunctionBegin; 1081 nz = baij->s_maxnz; 1082 n = mat->n; 1083 for (i=0; i<nz; i++) { 1084 baij->j[i] = indices[i]; 1085 } 1086 baij->s_nz = nz; 1087 for (i=0; i<n; i++) { 1088 baij->ilen[i] = baij->imax[i]; 1089 } 1090 1091 PetscFunctionReturn(0); 1092 } 1093 EXTERN_C_END 1094 1095 #undef __FUNCT__ 1096 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices" 1097 /*@ 1098 MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows 1099 in the matrix. 1100 1101 Input Parameters: 1102 + mat - the SeqSBAIJ matrix 1103 - indices - the column indices 1104 1105 Level: advanced 1106 1107 Notes: 1108 This can be called if you have precomputed the nonzero structure of the 1109 matrix and want to provide it to the matrix object to improve the performance 1110 of the MatSetValues() operation. 1111 1112 You MUST have set the correct numbers of nonzeros per row in the call to 1113 MatCreateSeqSBAIJ(). 1114 1115 MUST be called before any calls to MatSetValues() 1116 1117 .seealso: MatCreateSeqSBAIJ 1118 @*/ 1119 int MatSeqSBAIJSetColumnIndices(Mat mat,int *indices) 1120 { 1121 int ierr,(*f)(Mat,int *); 1122 1123 PetscFunctionBegin; 1124 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1125 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1126 if (f) { 1127 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1128 } else { 1129 SETERRQ(1,"Wrong type of matrix to set column indices"); 1130 } 1131 PetscFunctionReturn(0); 1132 } 1133 1134 #undef __FUNCT__ 1135 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ" 1136 int MatSetUpPreallocation_SeqSBAIJ(Mat A) 1137 { 1138 int ierr; 1139 1140 PetscFunctionBegin; 1141 ierr = MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1142 PetscFunctionReturn(0); 1143 } 1144 1145 /* -------------------------------------------------------------------*/ 1146 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ, 1147 MatGetRow_SeqSBAIJ, 1148 MatRestoreRow_SeqSBAIJ, 1149 MatMult_SeqSBAIJ_N, 1150 MatMultAdd_SeqSBAIJ_N, 1151 MatMultTranspose_SeqSBAIJ, 1152 MatMultTransposeAdd_SeqSBAIJ, 1153 MatSolve_SeqSBAIJ_N, 1154 0, 1155 0, 1156 0, 1157 0, 1158 MatCholeskyFactor_SeqSBAIJ, 1159 MatRelax_SeqSBAIJ, 1160 MatTranspose_SeqSBAIJ, 1161 MatGetInfo_SeqSBAIJ, 1162 MatEqual_SeqSBAIJ, 1163 MatGetDiagonal_SeqSBAIJ, 1164 MatDiagonalScale_SeqSBAIJ, 1165 MatNorm_SeqSBAIJ, 1166 0, 1167 MatAssemblyEnd_SeqSBAIJ, 1168 0, 1169 MatSetOption_SeqSBAIJ, 1170 MatZeroEntries_SeqSBAIJ, 1171 MatZeroRows_SeqSBAIJ, 1172 0, 1173 0, 1174 MatCholeskyFactorSymbolic_SeqSBAIJ, 1175 MatCholeskyFactorNumeric_SeqSBAIJ_N, 1176 MatSetUpPreallocation_SeqSBAIJ, 1177 0, 1178 MatICCFactorSymbolic_SeqSBAIJ, 1179 0, 1180 0, 1181 MatDuplicate_SeqSBAIJ, 1182 0, 1183 0, 1184 0, 1185 0, 1186 0, 1187 MatGetSubMatrices_SeqSBAIJ, 1188 MatIncreaseOverlap_SeqSBAIJ, 1189 MatGetValues_SeqSBAIJ, 1190 0, 1191 MatPrintHelp_SeqSBAIJ, 1192 MatScale_SeqSBAIJ, 1193 0, 1194 0, 1195 0, 1196 MatGetBlockSize_SeqSBAIJ, 1197 MatGetRowIJ_SeqSBAIJ, 1198 MatRestoreRowIJ_SeqSBAIJ, 1199 0, 1200 0, 1201 0, 1202 0, 1203 0, 1204 0, 1205 MatSetValuesBlocked_SeqSBAIJ, 1206 MatGetSubMatrix_SeqSBAIJ, 1207 0, 1208 0, 1209 MatGetPetscMaps_Petsc, 1210 0, 1211 0, 1212 0, 1213 0, 1214 0, 1215 0, 1216 MatGetRowMax_SeqSBAIJ}; 1217 1218 EXTERN_C_BEGIN 1219 #undef __FUNCT__ 1220 #define __FUNCT__ "MatStoreValues_SeqSBAIJ" 1221 int MatStoreValues_SeqSBAIJ(Mat mat) 1222 { 1223 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1224 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1225 int ierr; 1226 1227 PetscFunctionBegin; 1228 if (aij->nonew != 1) { 1229 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1230 } 1231 1232 /* allocate space for values if not already there */ 1233 if (!aij->saved_values) { 1234 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1235 } 1236 1237 /* copy values over */ 1238 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1239 PetscFunctionReturn(0); 1240 } 1241 EXTERN_C_END 1242 1243 EXTERN_C_BEGIN 1244 #undef __FUNCT__ 1245 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ" 1246 int MatRetrieveValues_SeqSBAIJ(Mat mat) 1247 { 1248 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1249 int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr; 1250 1251 PetscFunctionBegin; 1252 if (aij->nonew != 1) { 1253 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1254 } 1255 if (!aij->saved_values) { 1256 SETERRQ(1,"Must call MatStoreValues(A);first"); 1257 } 1258 1259 /* copy values over */ 1260 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1261 PetscFunctionReturn(0); 1262 } 1263 EXTERN_C_END 1264 1265 EXTERN_C_BEGIN 1266 #undef __FUNCT__ 1267 #define __FUNCT__ "MatCreate_SeqSBAIJ" 1268 int MatCreate_SeqSBAIJ(Mat B) 1269 { 1270 Mat_SeqSBAIJ *b; 1271 int ierr,size; 1272 1273 PetscFunctionBegin; 1274 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1275 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1276 B->m = B->M = PetscMax(B->m,B->M); 1277 B->n = B->N = PetscMax(B->n,B->N); 1278 1279 ierr = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr); 1280 B->data = (void*)b; 1281 ierr = PetscMemzero(b,sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr); 1282 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1283 B->ops->destroy = MatDestroy_SeqSBAIJ; 1284 B->ops->view = MatView_SeqSBAIJ; 1285 B->factor = 0; 1286 B->lupivotthreshold = 1.0; 1287 B->mapping = 0; 1288 b->row = 0; 1289 b->icol = 0; 1290 b->reallocs = 0; 1291 b->saved_values = 0; 1292 1293 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1294 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1295 1296 b->sorted = PETSC_FALSE; 1297 b->roworiented = PETSC_TRUE; 1298 b->nonew = 0; 1299 b->diag = 0; 1300 b->solve_work = 0; 1301 b->mult_work = 0; 1302 B->spptr = 0; 1303 b->keepzeroedrows = PETSC_FALSE; 1304 1305 b->inew = 0; 1306 b->jnew = 0; 1307 b->anew = 0; 1308 b->a2anew = 0; 1309 b->permute = PETSC_FALSE; 1310 1311 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1312 "MatStoreValues_SeqSBAIJ", 1313 (void*)MatStoreValues_SeqSBAIJ);CHKERRQ(ierr); 1314 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1315 "MatRetrieveValues_SeqSBAIJ", 1316 (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr); 1317 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C", 1318 "MatSeqSBAIJSetColumnIndices_SeqSBAIJ", 1319 (void*)MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr); 1320 PetscFunctionReturn(0); 1321 } 1322 EXTERN_C_END 1323 1324 #undef __FUNCT__ 1325 #define __FUNCT__ "MatSeqSBAIJSetPreallocation" 1326 /*@C 1327 MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block 1328 compressed row) format. For good matrix assembly performance the 1329 user should preallocate the matrix storage by setting the parameter nz 1330 (or the array nnz). By setting these parameters accurately, performance 1331 during matrix assembly can be increased by more than a factor of 50. 1332 1333 Collective on Mat 1334 1335 Input Parameters: 1336 + A - the symmetric matrix 1337 . bs - size of block 1338 . nz - number of block nonzeros per block row (same for all rows) 1339 - nnz - array containing the number of block nonzeros in the upper triangular plus 1340 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1341 1342 Options Database Keys: 1343 . -mat_no_unroll - uses code that does not unroll the loops in the 1344 block calculations (much slower) 1345 . -mat_block_size - size of the blocks to use 1346 1347 Level: intermediate 1348 1349 Notes: 1350 Specify the preallocated storage with either nz or nnz (not both). 1351 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1352 allocation. For additional details, see the users manual chapter on 1353 matrices. 1354 1355 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1356 @*/ 1357 int MatSeqSBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz) 1358 { 1359 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1360 int i,len,ierr,mbs,bs2; 1361 PetscTruth flg; 1362 int s_nz; 1363 1364 PetscFunctionBegin; 1365 B->preallocated = PETSC_TRUE; 1366 ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1367 mbs = B->m/bs; 1368 bs2 = bs*bs; 1369 1370 if (mbs*bs != B->m) { 1371 SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize"); 1372 } 1373 1374 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3; 1375 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 1376 if (nnz) { 1377 for (i=0; i<mbs; i++) { 1378 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1379 if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],mbs); 1380 } 1381 } 1382 1383 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1384 if (!flg) { 1385 switch (bs) { 1386 case 1: 1387 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1; 1388 B->ops->solve = MatSolve_SeqSBAIJ_1; 1389 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_1; 1390 B->ops->mult = MatMult_SeqSBAIJ_1; 1391 B->ops->multadd = MatMultAdd_SeqSBAIJ_1; 1392 break; 1393 case 2: 1394 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2; 1395 B->ops->solve = MatSolve_SeqSBAIJ_2; 1396 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_2; 1397 B->ops->mult = MatMult_SeqSBAIJ_2; 1398 B->ops->multadd = MatMultAdd_SeqSBAIJ_2; 1399 break; 1400 case 3: 1401 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3; 1402 B->ops->solve = MatSolve_SeqSBAIJ_3; 1403 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_3; 1404 B->ops->mult = MatMult_SeqSBAIJ_3; 1405 B->ops->multadd = MatMultAdd_SeqSBAIJ_3; 1406 break; 1407 case 4: 1408 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4; 1409 B->ops->solve = MatSolve_SeqSBAIJ_4; 1410 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_4; 1411 B->ops->mult = MatMult_SeqSBAIJ_4; 1412 B->ops->multadd = MatMultAdd_SeqSBAIJ_4; 1413 break; 1414 case 5: 1415 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5; 1416 B->ops->solve = MatSolve_SeqSBAIJ_5; 1417 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_5; 1418 B->ops->mult = MatMult_SeqSBAIJ_5; 1419 B->ops->multadd = MatMultAdd_SeqSBAIJ_5; 1420 break; 1421 case 6: 1422 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6; 1423 B->ops->solve = MatSolve_SeqSBAIJ_6; 1424 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_6; 1425 B->ops->mult = MatMult_SeqSBAIJ_6; 1426 B->ops->multadd = MatMultAdd_SeqSBAIJ_6; 1427 break; 1428 case 7: 1429 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7; 1430 B->ops->solve = MatSolve_SeqSBAIJ_7; 1431 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_7; 1432 B->ops->mult = MatMult_SeqSBAIJ_7; 1433 B->ops->multadd = MatMultAdd_SeqSBAIJ_7; 1434 break; 1435 } 1436 } 1437 1438 b->mbs = mbs; 1439 b->nbs = mbs; 1440 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 1441 if (!nnz) { 1442 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1443 else if (nz <= 0) nz = 1; 1444 for (i=0; i<mbs; i++) { 1445 b->imax[i] = nz; 1446 } 1447 nz = nz*mbs; /* total nz */ 1448 } else { 1449 nz = 0; 1450 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1451 } 1452 /* s_nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */ 1453 s_nz = nz; 1454 1455 /* allocate the matrix space */ 1456 len = s_nz*sizeof(int) + s_nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 1457 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 1458 ierr = PetscMemzero(b->a,s_nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1459 b->j = (int*)(b->a + s_nz*bs2); 1460 ierr = PetscMemzero(b->j,s_nz*sizeof(int));CHKERRQ(ierr); 1461 b->i = b->j + s_nz; 1462 b->singlemalloc = PETSC_TRUE; 1463 1464 /* pointer to beginning of each row */ 1465 b->i[0] = 0; 1466 for (i=1; i<mbs+1; i++) { 1467 b->i[i] = b->i[i-1] + b->imax[i-1]; 1468 } 1469 1470 /* b->ilen will count nonzeros in each block row so far. */ 1471 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 1472 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ)); 1473 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1474 1475 b->bs = bs; 1476 b->bs2 = bs2; 1477 b->s_nz = 0; 1478 b->s_maxnz = s_nz*bs2; 1479 1480 b->inew = 0; 1481 b->jnew = 0; 1482 b->anew = 0; 1483 b->a2anew = 0; 1484 b->permute = PETSC_FALSE; 1485 PetscFunctionReturn(0); 1486 } 1487 1488 1489 #undef __FUNCT__ 1490 #define __FUNCT__ "MatCreateSeqSBAIJ" 1491 /*@C 1492 MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block 1493 compressed row) format. For good matrix assembly performance the 1494 user should preallocate the matrix storage by setting the parameter nz 1495 (or the array nnz). By setting these parameters accurately, performance 1496 during matrix assembly can be increased by more than a factor of 50. 1497 1498 Collective on MPI_Comm 1499 1500 Input Parameters: 1501 + comm - MPI communicator, set to PETSC_COMM_SELF 1502 . bs - size of block 1503 . m - number of rows, or number of columns 1504 . nz - number of block nonzeros per block row (same for all rows) 1505 - nnz - array containing the number of block nonzeros in the upper triangular plus 1506 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1507 1508 Output Parameter: 1509 . A - the symmetric matrix 1510 1511 Options Database Keys: 1512 . -mat_no_unroll - uses code that does not unroll the loops in the 1513 block calculations (much slower) 1514 . -mat_block_size - size of the blocks to use 1515 1516 Level: intermediate 1517 1518 Notes: 1519 1520 Specify the preallocated storage with either nz or nnz (not both). 1521 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1522 allocation. For additional details, see the users manual chapter on 1523 matrices. 1524 1525 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1526 @*/ 1527 int MatCreateSeqSBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A) 1528 { 1529 int ierr; 1530 1531 PetscFunctionBegin; 1532 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1533 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1534 ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1535 PetscFunctionReturn(0); 1536 } 1537 1538 #undef __FUNCT__ 1539 #define __FUNCT__ "MatDuplicate_SeqSBAIJ" 1540 int MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1541 { 1542 Mat C; 1543 Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data; 1544 int i,len,mbs = a->mbs,nz = a->s_nz,bs2 =a->bs2,ierr; 1545 1546 PetscFunctionBegin; 1547 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1548 1549 *B = 0; 1550 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1551 ierr = MatSetType(C,MATSEQSBAIJ);CHKERRQ(ierr); 1552 c = (Mat_SeqSBAIJ*)C->data; 1553 1554 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1555 C->preallocated = PETSC_TRUE; 1556 C->factor = A->factor; 1557 c->row = 0; 1558 c->icol = 0; 1559 c->saved_values = 0; 1560 c->keepzeroedrows = a->keepzeroedrows; 1561 C->assembled = PETSC_TRUE; 1562 1563 c->bs = a->bs; 1564 c->bs2 = a->bs2; 1565 c->mbs = a->mbs; 1566 c->nbs = a->nbs; 1567 1568 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 1569 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 1570 for (i=0; i<mbs; i++) { 1571 c->imax[i] = a->imax[i]; 1572 c->ilen[i] = a->ilen[i]; 1573 } 1574 1575 /* allocate the matrix space */ 1576 c->singlemalloc = PETSC_TRUE; 1577 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1578 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1579 c->j = (int*)(c->a + nz*bs2); 1580 c->i = c->j + nz; 1581 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1582 if (mbs > 0) { 1583 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1584 if (cpvalues == MAT_COPY_VALUES) { 1585 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1586 } else { 1587 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1588 } 1589 } 1590 1591 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ)); 1592 c->sorted = a->sorted; 1593 c->roworiented = a->roworiented; 1594 c->nonew = a->nonew; 1595 1596 if (a->diag) { 1597 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 1598 PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 1599 for (i=0; i<mbs; i++) { 1600 c->diag[i] = a->diag[i]; 1601 } 1602 } else c->diag = 0; 1603 c->s_nz = a->s_nz; 1604 c->s_maxnz = a->s_maxnz; 1605 c->solve_work = 0; 1606 C->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1607 c->mult_work = 0; 1608 *B = C; 1609 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1610 PetscFunctionReturn(0); 1611 } 1612 1613 EXTERN_C_BEGIN 1614 #undef __FUNCT__ 1615 #define __FUNCT__ "MatLoad_SeqSBAIJ" 1616 int MatLoad_SeqSBAIJ(PetscViewer viewer,MatType type,Mat *A) 1617 { 1618 Mat_SeqSBAIJ *a; 1619 Mat B; 1620 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1621 int *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount; 1622 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1623 int *masked,nmask,tmp,bs2,ishift; 1624 PetscScalar *aa; 1625 MPI_Comm comm = ((PetscObject)viewer)->comm; 1626 1627 PetscFunctionBegin; 1628 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1629 bs2 = bs*bs; 1630 1631 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1632 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1633 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1634 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1635 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1636 M = header[1]; N = header[2]; nz = header[3]; 1637 1638 if (header[3] < 0) { 1639 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ"); 1640 } 1641 1642 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1643 1644 /* 1645 This code adds extra rows to make sure the number of rows is 1646 divisible by the blocksize 1647 */ 1648 mbs = M/bs; 1649 extra_rows = bs - M + bs*(mbs); 1650 if (extra_rows == bs) extra_rows = 0; 1651 else mbs++; 1652 if (extra_rows) { 1653 PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n"); 1654 } 1655 1656 /* read in row lengths */ 1657 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 1658 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1659 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1660 1661 /* read in column indices */ 1662 ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 1663 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1664 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1665 1666 /* loop over row lengths determining block row lengths */ 1667 ierr = PetscMalloc(mbs*sizeof(int),&s_browlengths);CHKERRQ(ierr); 1668 ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 1669 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1670 masked = mask + mbs; 1671 rowcount = 0; nzcount = 0; 1672 for (i=0; i<mbs; i++) { 1673 nmask = 0; 1674 for (j=0; j<bs; j++) { 1675 kmax = rowlengths[rowcount]; 1676 for (k=0; k<kmax; k++) { 1677 tmp = jj[nzcount++]/bs; /* block col. index */ 1678 if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;} 1679 } 1680 rowcount++; 1681 } 1682 s_browlengths[i] += nmask; 1683 1684 /* zero out the mask elements we set */ 1685 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1686 } 1687 1688 /* create our matrix */ 1689 ierr = MatCreateSeqSBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,s_browlengths,A);CHKERRQ(ierr); 1690 B = *A; 1691 a = (Mat_SeqSBAIJ*)B->data; 1692 1693 /* set matrix "i" values */ 1694 a->i[0] = 0; 1695 for (i=1; i<= mbs; i++) { 1696 a->i[i] = a->i[i-1] + s_browlengths[i-1]; 1697 a->ilen[i-1] = s_browlengths[i-1]; 1698 } 1699 a->s_nz = a->i[mbs]; 1700 1701 /* read in nonzero values */ 1702 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1703 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1704 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1705 1706 /* set "a" and "j" values into matrix */ 1707 nzcount = 0; jcount = 0; 1708 for (i=0; i<mbs; i++) { 1709 nzcountb = nzcount; 1710 nmask = 0; 1711 for (j=0; j<bs; j++) { 1712 kmax = rowlengths[i*bs+j]; 1713 for (k=0; k<kmax; k++) { 1714 tmp = jj[nzcount++]/bs; /* block col. index */ 1715 if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;} 1716 } 1717 } 1718 /* sort the masked values */ 1719 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1720 1721 /* set "j" values into matrix */ 1722 maskcount = 1; 1723 for (j=0; j<nmask; j++) { 1724 a->j[jcount++] = masked[j]; 1725 mask[masked[j]] = maskcount++; 1726 } 1727 1728 /* set "a" values into matrix */ 1729 ishift = bs2*a->i[i]; 1730 for (j=0; j<bs; j++) { 1731 kmax = rowlengths[i*bs+j]; 1732 for (k=0; k<kmax; k++) { 1733 tmp = jj[nzcountb]/bs ; /* block col. index */ 1734 if (tmp >= i){ 1735 block = mask[tmp] - 1; 1736 point = jj[nzcountb] - bs*tmp; 1737 idx = ishift + bs2*block + j + bs*point; 1738 a->a[idx] = aa[nzcountb]; 1739 } 1740 nzcountb++; 1741 } 1742 } 1743 /* zero out the mask elements we set */ 1744 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1745 } 1746 if (jcount != a->s_nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1747 1748 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1749 ierr = PetscFree(s_browlengths);CHKERRQ(ierr); 1750 ierr = PetscFree(aa);CHKERRQ(ierr); 1751 ierr = PetscFree(jj);CHKERRQ(ierr); 1752 ierr = PetscFree(mask);CHKERRQ(ierr); 1753 1754 B->assembled = PETSC_TRUE; 1755 ierr = MatView_Private(B);CHKERRQ(ierr); 1756 PetscFunctionReturn(0); 1757 } 1758 EXTERN_C_END 1759 1760 #undef __FUNCT__ 1761 #define __FUNCT__ "MatRelax_SeqSBAIJ" 1762 int MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 1763 { 1764 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1765 MatScalar *aa=a->a,*v,*v1; 1766 PetscScalar *x,*b,*t,sum,d; 1767 int m=a->mbs,bs=a->bs,*ai=a->i,*aj=a->j,ierr; 1768 int nz,nz1,*vj,*vj1,i; 1769 1770 PetscFunctionBegin; 1771 its = its*lits; 1772 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 1773 1774 if (bs > 1) 1775 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 1776 1777 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1778 if (xx != bb) { 1779 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1780 } else { 1781 b = x; 1782 } 1783 1784 ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr); 1785 1786 if (flag & SOR_ZERO_INITIAL_GUESS) { 1787 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 1788 for (i=0; i<m; i++) 1789 t[i] = b[i]; 1790 1791 for (i=0; i<m; i++){ 1792 d = *(aa + ai[i]); /* diag[i] */ 1793 v = aa + ai[i] + 1; 1794 vj = aj + ai[i] + 1; 1795 nz = ai[i+1] - ai[i] - 1; 1796 x[i] = omega*t[i]/d; 1797 while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */ 1798 PetscLogFlops(2*nz-1); 1799 } 1800 } 1801 1802 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 1803 for (i=0; i<m; i++) 1804 t[i] = b[i]; 1805 1806 for (i=0; i<m-1; i++){ /* update rhs */ 1807 v = aa + ai[i] + 1; 1808 vj = aj + ai[i] + 1; 1809 nz = ai[i+1] - ai[i] - 1; 1810 while (nz--) t[*vj++] -= x[i]*(*v++); 1811 PetscLogFlops(2*nz-1); 1812 } 1813 for (i=m-1; i>=0; i--){ 1814 d = *(aa + ai[i]); 1815 v = aa + ai[i] + 1; 1816 vj = aj + ai[i] + 1; 1817 nz = ai[i+1] - ai[i] - 1; 1818 sum = t[i]; 1819 while (nz--) sum -= x[*vj++]*(*v++); 1820 PetscLogFlops(2*nz-1); 1821 x[i] = (1-omega)*x[i] + omega*sum/d; 1822 } 1823 } 1824 its--; 1825 } 1826 1827 while (its--) { 1828 /* 1829 forward sweep: 1830 for i=0,...,m-1: 1831 sum[i] = (b[i] - U(i,:)x )/d[i]; 1832 x[i] = (1-omega)x[i] + omega*sum[i]; 1833 b = b - x[i]*U^T(i,:); 1834 1835 */ 1836 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 1837 for (i=0; i<m; i++) 1838 t[i] = b[i]; 1839 1840 for (i=0; i<m; i++){ 1841 d = *(aa + ai[i]); /* diag[i] */ 1842 v = aa + ai[i] + 1; v1=v; 1843 vj = aj + ai[i] + 1; vj1=vj; 1844 nz = ai[i+1] - ai[i] - 1; nz1=nz; 1845 sum = t[i]; 1846 while (nz1--) sum -= (*v1++)*x[*vj1++]; 1847 x[i] = (1-omega)*x[i] + omega*sum/d; 1848 while (nz--) t[*vj++] -= x[i]*(*v++); 1849 PetscLogFlops(4*nz-2); 1850 } 1851 } 1852 1853 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 1854 /* 1855 backward sweep: 1856 b = b - x[i]*U^T(i,:), i=0,...,n-2 1857 for i=m-1,...,0: 1858 sum[i] = (b[i] - U(i,:)x )/d[i]; 1859 x[i] = (1-omega)x[i] + omega*sum[i]; 1860 */ 1861 for (i=0; i<m; i++) 1862 t[i] = b[i]; 1863 1864 for (i=0; i<m-1; i++){ /* update rhs */ 1865 v = aa + ai[i] + 1; 1866 vj = aj + ai[i] + 1; 1867 nz = ai[i+1] - ai[i] - 1; 1868 while (nz--) t[*vj++] -= x[i]*(*v++); 1869 PetscLogFlops(2*nz-1); 1870 } 1871 for (i=m-1; i>=0; i--){ 1872 d = *(aa + ai[i]); 1873 v = aa + ai[i] + 1; 1874 vj = aj + ai[i] + 1; 1875 nz = ai[i+1] - ai[i] - 1; 1876 sum = t[i]; 1877 while (nz--) sum -= x[*vj++]*(*v++); 1878 PetscLogFlops(2*nz-1); 1879 x[i] = (1-omega)*x[i] + omega*sum/d; 1880 } 1881 } 1882 } 1883 1884 ierr = PetscFree(t); CHKERRQ(ierr); 1885 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1886 if (bb != xx) { 1887 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1888 } 1889 1890 PetscFunctionReturn(0); 1891 } 1892 1893 1894 1895 1896 1897 1898