1 /*$Id: sbaij.c,v 1.62 2001/08/07 03:03:01 balay Exp $*/ 2 3 /* 4 Defines the basic matrix operations for the SBAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 8 #include "src/vec/vecimpl.h" 9 #include "src/inline/spops.h" 10 #include "src/mat/impls/sbaij/seq/sbaij.h" 11 12 #define CHUNKSIZE 10 13 14 /* 15 Checks for missing diagonals 16 */ 17 #undef __FUNCT__ 18 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ" 19 int MatMissingDiagonal_SeqSBAIJ(Mat A) 20 { 21 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 22 int *diag,*jj = a->j,i,ierr; 23 24 PetscFunctionBegin; 25 ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr); 26 diag = a->diag; 27 for (i=0; i<a->mbs; i++) { 28 if (jj[diag[i]] != i) SETERRQ1(1,"Matrix is missing diagonal number %d",i); 29 } 30 PetscFunctionReturn(0); 31 } 32 33 #undef __FUNCT__ 34 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ" 35 int MatMarkDiagonal_SeqSBAIJ(Mat A) 36 { 37 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 38 int i,mbs = a->mbs,ierr; 39 40 PetscFunctionBegin; 41 if (a->diag) PetscFunctionReturn(0); 42 43 ierr = PetscMalloc((mbs+1)*sizeof(int),&a->diag);CHKERRQ(ierr); 44 PetscLogObjectMemory(A,(mbs+1)*sizeof(int)); 45 for (i=0; i<mbs; i++) a->diag[i] = a->i[i]; 46 PetscFunctionReturn(0); 47 } 48 49 #undef __FUNCT__ 50 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ" 51 static int MatGetRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 52 { 53 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 54 int n = a->mbs,i; 55 56 PetscFunctionBegin; 57 *nn = n; 58 if (!ia) PetscFunctionReturn(0); 59 60 if (oshift == 1) { 61 /* temporarily add 1 to i and j indices */ 62 int s_nz = a->i[n]; 63 for (i=0; i<s_nz; i++) a->j[i]++; 64 for (i=0; i<n+1; i++) a->i[i]++; 65 *ia = a->i; *ja = a->j; 66 } else { 67 *ia = a->i; *ja = a->j; 68 } 69 PetscFunctionReturn(0); 70 } 71 72 #undef __FUNCT__ 73 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ" 74 static int MatRestoreRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 75 { 76 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 77 int i,n = a->mbs; 78 79 PetscFunctionBegin; 80 if (!ia) PetscFunctionReturn(0); 81 82 if (oshift == 1) { 83 int s_nz = a->i[n]-1; 84 for (i=0; i<s_nz; i++) a->j[i]--; 85 for (i=0; i<n+1; i++) a->i[i]--; 86 } 87 PetscFunctionReturn(0); 88 } 89 90 #undef __FUNCT__ 91 #define __FUNCT__ "MatGetBlockSize_SeqSBAIJ" 92 int MatGetBlockSize_SeqSBAIJ(Mat mat,int *bs) 93 { 94 Mat_SeqSBAIJ *sbaij = (Mat_SeqSBAIJ*)mat->data; 95 96 PetscFunctionBegin; 97 *bs = sbaij->bs; 98 PetscFunctionReturn(0); 99 } 100 101 #undef __FUNCT__ 102 #define __FUNCT__ "MatDestroy_SeqSBAIJ" 103 int MatDestroy_SeqSBAIJ(Mat A) 104 { 105 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 106 int ierr; 107 108 PetscFunctionBegin; 109 #if defined(PETSC_USE_LOG) 110 PetscLogObjectState((PetscObject)A,"Rows=%d, s_NZ=%d",A->m,a->s_nz); 111 #endif 112 ierr = PetscFree(a->a);CHKERRQ(ierr); 113 if (!a->singlemalloc) { 114 ierr = PetscFree(a->i);CHKERRQ(ierr); 115 ierr = PetscFree(a->j);CHKERRQ(ierr); 116 } 117 if (a->row) { 118 ierr = ISDestroy(a->row);CHKERRQ(ierr); 119 } 120 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 121 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 122 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 123 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 124 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 125 if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);} 126 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 127 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 128 if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);} 129 130 if (a->inew){ 131 ierr = PetscFree(a->inew);CHKERRQ(ierr); 132 a->inew = 0; 133 } 134 ierr = PetscFree(a);CHKERRQ(ierr); 135 PetscFunctionReturn(0); 136 } 137 138 #undef __FUNCT__ 139 #define __FUNCT__ "MatSetOption_SeqSBAIJ" 140 int MatSetOption_SeqSBAIJ(Mat A,MatOption op) 141 { 142 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 143 144 PetscFunctionBegin; 145 switch (op) { 146 case MAT_ROW_ORIENTED: 147 a->roworiented = PETSC_TRUE; 148 break; 149 case MAT_COLUMN_ORIENTED: 150 a->roworiented = PETSC_FALSE; 151 break; 152 case MAT_COLUMNS_SORTED: 153 a->sorted = PETSC_TRUE; 154 break; 155 case MAT_COLUMNS_UNSORTED: 156 a->sorted = PETSC_FALSE; 157 break; 158 case MAT_KEEP_ZEROED_ROWS: 159 a->keepzeroedrows = PETSC_TRUE; 160 break; 161 case MAT_NO_NEW_NONZERO_LOCATIONS: 162 a->nonew = 1; 163 break; 164 case MAT_NEW_NONZERO_LOCATION_ERR: 165 a->nonew = -1; 166 break; 167 case MAT_NEW_NONZERO_ALLOCATION_ERR: 168 a->nonew = -2; 169 break; 170 case MAT_YES_NEW_NONZERO_LOCATIONS: 171 a->nonew = 0; 172 break; 173 case MAT_ROWS_SORTED: 174 case MAT_ROWS_UNSORTED: 175 case MAT_YES_NEW_DIAGONALS: 176 case MAT_IGNORE_OFF_PROC_ENTRIES: 177 case MAT_USE_HASH_TABLE: 178 PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n"); 179 break; 180 case MAT_NO_NEW_DIAGONALS: 181 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 182 default: 183 SETERRQ(PETSC_ERR_SUP,"unknown option"); 184 } 185 PetscFunctionReturn(0); 186 } 187 188 #undef __FUNCT__ 189 #define __FUNCT__ "MatGetRow_SeqSBAIJ" 190 int MatGetRow_SeqSBAIJ(Mat A,int row,int *ncols,int **cols,PetscScalar **v) 191 { 192 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 193 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2,ierr; 194 MatScalar *aa,*aa_i; 195 PetscScalar *v_i; 196 197 PetscFunctionBegin; 198 bs = a->bs; 199 ai = a->i; 200 aj = a->j; 201 aa = a->a; 202 bs2 = a->bs2; 203 204 if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range"); 205 206 bn = row/bs; /* Block number */ 207 bp = row % bs; /* Block position */ 208 M = ai[bn+1] - ai[bn]; 209 *ncols = bs*M; 210 211 if (v) { 212 *v = 0; 213 if (*ncols) { 214 ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr); 215 for (i=0; i<M; i++) { /* for each block in the block row */ 216 v_i = *v + i*bs; 217 aa_i = aa + bs2*(ai[bn] + i); 218 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 219 } 220 } 221 } 222 223 if (cols) { 224 *cols = 0; 225 if (*ncols) { 226 ierr = PetscMalloc((*ncols+row)*sizeof(int),cols);CHKERRQ(ierr); 227 for (i=0; i<M; i++) { /* for each block in the block row */ 228 cols_i = *cols + i*bs; 229 itmp = bs*aj[ai[bn] + i]; 230 for (j=0; j<bs; j++) {cols_i[j] = itmp++;} 231 } 232 } 233 } 234 235 /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */ 236 /* this segment is currently removed, so only entries in the upper triangle are obtained */ 237 #ifdef column_search 238 v_i = *v + M*bs; 239 cols_i = *cols + M*bs; 240 for (i=0; i<bn; i++){ /* for each block row */ 241 M = ai[i+1] - ai[i]; 242 for (j=0; j<M; j++){ 243 itmp = aj[ai[i] + j]; /* block column value */ 244 if (itmp == bn){ 245 aa_i = aa + bs2*(ai[i] + j) + bs*bp; 246 for (k=0; k<bs; k++) { 247 *cols_i++ = i*bs+k; 248 *v_i++ = aa_i[k]; 249 } 250 *ncols += bs; 251 break; 252 } 253 } 254 } 255 #endif 256 257 PetscFunctionReturn(0); 258 } 259 260 #undef __FUNCT__ 261 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ" 262 int MatRestoreRow_SeqSBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 263 { 264 int ierr; 265 266 PetscFunctionBegin; 267 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 268 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 269 PetscFunctionReturn(0); 270 } 271 272 #undef __FUNCT__ 273 #define __FUNCT__ "MatTranspose_SeqSBAIJ" 274 int MatTranspose_SeqSBAIJ(Mat A,Mat *B) 275 { 276 int ierr; 277 PetscFunctionBegin; 278 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 279 PetscFunctionReturn(0); 280 } 281 282 #undef __FUNCT__ 283 #define __FUNCT__ "MatView_SeqSBAIJ_Binary" 284 static int MatView_SeqSBAIJ_Binary(Mat A,PetscViewer viewer) 285 { 286 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 287 int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 288 PetscScalar *aa; 289 FILE *file; 290 291 PetscFunctionBegin; 292 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 293 ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr); 294 col_lens[0] = MAT_FILE_COOKIE; 295 296 col_lens[1] = A->m; 297 col_lens[2] = A->m; 298 col_lens[3] = a->s_nz*bs2; 299 300 /* store lengths of each row and write (including header) to file */ 301 count = 0; 302 for (i=0; i<a->mbs; i++) { 303 for (j=0; j<bs; j++) { 304 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 305 } 306 } 307 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr); 308 ierr = PetscFree(col_lens);CHKERRQ(ierr); 309 310 /* store column indices (zero start index) */ 311 ierr = PetscMalloc((a->s_nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr); 312 count = 0; 313 for (i=0; i<a->mbs; i++) { 314 for (j=0; j<bs; j++) { 315 for (k=a->i[i]; k<a->i[i+1]; k++) { 316 for (l=0; l<bs; l++) { 317 jj[count++] = bs*a->j[k] + l; 318 } 319 } 320 } 321 } 322 ierr = PetscBinaryWrite(fd,jj,bs2*a->s_nz,PETSC_INT,0);CHKERRQ(ierr); 323 ierr = PetscFree(jj);CHKERRQ(ierr); 324 325 /* store nonzero values */ 326 ierr = PetscMalloc((a->s_nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 327 count = 0; 328 for (i=0; i<a->mbs; i++) { 329 for (j=0; j<bs; j++) { 330 for (k=a->i[i]; k<a->i[i+1]; k++) { 331 for (l=0; l<bs; l++) { 332 aa[count++] = a->a[bs2*k + l*bs + j]; 333 } 334 } 335 } 336 } 337 ierr = PetscBinaryWrite(fd,aa,bs2*a->s_nz,PETSC_SCALAR,0);CHKERRQ(ierr); 338 ierr = PetscFree(aa);CHKERRQ(ierr); 339 340 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 341 if (file) { 342 fprintf(file,"-matload_block_size %d\n",a->bs); 343 } 344 PetscFunctionReturn(0); 345 } 346 347 #undef __FUNCT__ 348 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII" 349 static int MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer) 350 { 351 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 352 int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2; 353 char *name; 354 PetscViewerFormat format; 355 356 PetscFunctionBegin; 357 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 358 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 359 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 360 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 361 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 362 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 363 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 364 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 365 for (i=0; i<a->mbs; i++) { 366 for (j=0; j<bs; j++) { 367 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 368 for (k=a->i[i]; k<a->i[i+1]; k++) { 369 for (l=0; l<bs; l++) { 370 #if defined(PETSC_USE_COMPLEX) 371 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 372 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 373 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 374 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 375 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 376 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 377 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 378 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 379 } 380 #else 381 if (a->a[bs2*k + l*bs + j] != 0.0) { 382 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 383 } 384 #endif 385 } 386 } 387 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 388 } 389 } 390 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 391 } else { 392 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 393 for (i=0; i<a->mbs; i++) { 394 for (j=0; j<bs; j++) { 395 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 396 for (k=a->i[i]; k<a->i[i+1]; k++) { 397 for (l=0; l<bs; l++) { 398 #if defined(PETSC_USE_COMPLEX) 399 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 400 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 401 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 402 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 403 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 404 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 405 } else { 406 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 407 } 408 #else 409 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 410 #endif 411 } 412 } 413 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 414 } 415 } 416 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 417 } 418 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 419 PetscFunctionReturn(0); 420 } 421 422 #undef __FUNCT__ 423 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom" 424 static int MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 425 { 426 Mat A = (Mat) Aa; 427 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 428 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank; 429 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 430 MatScalar *aa; 431 MPI_Comm comm; 432 PetscViewer viewer; 433 434 PetscFunctionBegin; 435 /* 436 This is nasty. If this is called from an originally parallel matrix 437 then all processes call this,but only the first has the matrix so the 438 rest should return immediately. 439 */ 440 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 441 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 442 if (rank) PetscFunctionReturn(0); 443 444 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 445 446 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 447 PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric"); 448 449 /* loop over matrix elements drawing boxes */ 450 color = PETSC_DRAW_BLUE; 451 for (i=0,row=0; i<mbs; i++,row+=bs) { 452 for (j=a->i[i]; j<a->i[i+1]; j++) { 453 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 454 x_l = a->j[j]*bs; x_r = x_l + 1.0; 455 aa = a->a + j*bs2; 456 for (k=0; k<bs; k++) { 457 for (l=0; l<bs; l++) { 458 if (PetscRealPart(*aa++) >= 0.) continue; 459 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 460 } 461 } 462 } 463 } 464 color = PETSC_DRAW_CYAN; 465 for (i=0,row=0; i<mbs; i++,row+=bs) { 466 for (j=a->i[i]; j<a->i[i+1]; j++) { 467 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 468 x_l = a->j[j]*bs; x_r = x_l + 1.0; 469 aa = a->a + j*bs2; 470 for (k=0; k<bs; k++) { 471 for (l=0; l<bs; l++) { 472 if (PetscRealPart(*aa++) != 0.) continue; 473 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 474 } 475 } 476 } 477 } 478 479 color = PETSC_DRAW_RED; 480 for (i=0,row=0; i<mbs; i++,row+=bs) { 481 for (j=a->i[i]; j<a->i[i+1]; j++) { 482 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 483 x_l = a->j[j]*bs; x_r = x_l + 1.0; 484 aa = a->a + j*bs2; 485 for (k=0; k<bs; k++) { 486 for (l=0; l<bs; l++) { 487 if (PetscRealPart(*aa++) <= 0.) continue; 488 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 489 } 490 } 491 } 492 } 493 PetscFunctionReturn(0); 494 } 495 496 #undef __FUNCT__ 497 #define __FUNCT__ "MatView_SeqSBAIJ_Draw" 498 static int MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer) 499 { 500 int ierr; 501 PetscReal xl,yl,xr,yr,w,h; 502 PetscDraw draw; 503 PetscTruth isnull; 504 505 PetscFunctionBegin; 506 507 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 508 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 509 510 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 511 xr = A->m; yr = A->m; h = yr/10.0; w = xr/10.0; 512 xr += w; yr += h; xl = -w; yl = -h; 513 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 514 ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 515 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 516 PetscFunctionReturn(0); 517 } 518 519 #undef __FUNCT__ 520 #define __FUNCT__ "MatView_SeqSBAIJ" 521 int MatView_SeqSBAIJ(Mat A,PetscViewer viewer) 522 { 523 int ierr; 524 PetscTruth issocket,isascii,isbinary,isdraw; 525 526 PetscFunctionBegin; 527 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 528 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 529 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 530 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 531 if (issocket) { 532 SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported"); 533 } else if (isascii){ 534 ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 535 } else if (isbinary) { 536 ierr = MatView_SeqSBAIJ_Binary(A,viewer);CHKERRQ(ierr); 537 } else if (isdraw) { 538 ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr); 539 } else { 540 SETERRQ1(1,"Viewer type %s not supported by SeqSBAIJ matrices",((PetscObject)viewer)->type_name); 541 } 542 PetscFunctionReturn(0); 543 } 544 545 546 #undef __FUNCT__ 547 #define __FUNCT__ "MatGetValues_SeqSBAIJ" 548 int MatGetValues_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v) 549 { 550 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 551 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 552 int *ai = a->i,*ailen = a->ilen; 553 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 554 MatScalar *ap,*aa = a->a,zero = 0.0; 555 556 PetscFunctionBegin; 557 for (k=0; k<m; k++) { /* loop over rows */ 558 row = im[k]; brow = row/bs; 559 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 560 if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 561 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 562 nrow = ailen[brow]; 563 for (l=0; l<n; l++) { /* loop over columns */ 564 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 565 if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 566 col = in[l] ; 567 bcol = col/bs; 568 cidx = col%bs; 569 ridx = row%bs; 570 high = nrow; 571 low = 0; /* assume unsorted */ 572 while (high-low > 5) { 573 t = (low+high)/2; 574 if (rp[t] > bcol) high = t; 575 else low = t; 576 } 577 for (i=low; i<high; i++) { 578 if (rp[i] > bcol) break; 579 if (rp[i] == bcol) { 580 *v++ = ap[bs2*i+bs*cidx+ridx]; 581 goto finished; 582 } 583 } 584 *v++ = zero; 585 finished:; 586 } 587 } 588 PetscFunctionReturn(0); 589 } 590 591 592 #undef __FUNCT__ 593 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ" 594 int MatSetValuesBlocked_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is) 595 { 596 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 597 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 598 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 599 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 600 PetscTruth roworiented=a->roworiented; 601 MatScalar *value = v,*ap,*aa = a->a,*bap; 602 603 PetscFunctionBegin; 604 if (roworiented) { 605 stepval = (n-1)*bs; 606 } else { 607 stepval = (m-1)*bs; 608 } 609 for (k=0; k<m; k++) { /* loop over added rows */ 610 row = im[k]; 611 if (row < 0) continue; 612 #if defined(PETSC_USE_BOPT_g) 613 if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 614 #endif 615 rp = aj + ai[row]; 616 ap = aa + bs2*ai[row]; 617 rmax = imax[row]; 618 nrow = ailen[row]; 619 low = 0; 620 for (l=0; l<n; l++) { /* loop over added columns */ 621 if (in[l] < 0) continue; 622 #if defined(PETSC_USE_BOPT_g) 623 if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 624 #endif 625 col = in[l]; 626 if (col < row) continue; /* ignore lower triangular block */ 627 if (roworiented) { 628 value = v + k*(stepval+bs)*bs + l*bs; 629 } else { 630 value = v + l*(stepval+bs)*bs + k*bs; 631 } 632 if (!sorted) low = 0; high = nrow; 633 while (high-low > 7) { 634 t = (low+high)/2; 635 if (rp[t] > col) high = t; 636 else low = t; 637 } 638 for (i=low; i<high; i++) { 639 if (rp[i] > col) break; 640 if (rp[i] == col) { 641 bap = ap + bs2*i; 642 if (roworiented) { 643 if (is == ADD_VALUES) { 644 for (ii=0; ii<bs; ii++,value+=stepval) { 645 for (jj=ii; jj<bs2; jj+=bs) { 646 bap[jj] += *value++; 647 } 648 } 649 } else { 650 for (ii=0; ii<bs; ii++,value+=stepval) { 651 for (jj=ii; jj<bs2; jj+=bs) { 652 bap[jj] = *value++; 653 } 654 } 655 } 656 } else { 657 if (is == ADD_VALUES) { 658 for (ii=0; ii<bs; ii++,value+=stepval) { 659 for (jj=0; jj<bs; jj++) { 660 *bap++ += *value++; 661 } 662 } 663 } else { 664 for (ii=0; ii<bs; ii++,value+=stepval) { 665 for (jj=0; jj<bs; jj++) { 666 *bap++ = *value++; 667 } 668 } 669 } 670 } 671 goto noinsert2; 672 } 673 } 674 if (nonew == 1) goto noinsert2; 675 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 676 if (nrow >= rmax) { 677 /* there is no extra room in row, therefore enlarge */ 678 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 679 MatScalar *new_a; 680 681 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 682 683 /* malloc new storage space */ 684 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 685 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 686 new_j = (int*)(new_a + bs2*new_nz); 687 new_i = new_j + new_nz; 688 689 /* copy over old data into new slots */ 690 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 691 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 692 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 693 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 694 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 695 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 696 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 697 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 698 /* free up old matrix storage */ 699 ierr = PetscFree(a->a);CHKERRQ(ierr); 700 if (!a->singlemalloc) { 701 ierr = PetscFree(a->i);CHKERRQ(ierr); 702 ierr = PetscFree(a->j);CHKERRQ(ierr); 703 } 704 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 705 a->singlemalloc = PETSC_TRUE; 706 707 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 708 rmax = imax[row] = imax[row] + CHUNKSIZE; 709 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 710 a->s_maxnz += bs2*CHUNKSIZE; 711 a->reallocs++; 712 a->s_nz++; 713 } 714 N = nrow++ - 1; 715 /* shift up all the later entries in this row */ 716 for (ii=N; ii>=i; ii--) { 717 rp[ii+1] = rp[ii]; 718 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 719 } 720 if (N >= i) { 721 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 722 } 723 rp[i] = col; 724 bap = ap + bs2*i; 725 if (roworiented) { 726 for (ii=0; ii<bs; ii++,value+=stepval) { 727 for (jj=ii; jj<bs2; jj+=bs) { 728 bap[jj] = *value++; 729 } 730 } 731 } else { 732 for (ii=0; ii<bs; ii++,value+=stepval) { 733 for (jj=0; jj<bs; jj++) { 734 *bap++ = *value++; 735 } 736 } 737 } 738 noinsert2:; 739 low = i; 740 } 741 ailen[row] = nrow; 742 } 743 PetscFunctionReturn(0); 744 } 745 746 #undef __FUNCT__ 747 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ" 748 int MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode) 749 { 750 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 751 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 752 int m = A->m,*ip,N,*ailen = a->ilen; 753 int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 754 MatScalar *aa = a->a,*ap; 755 #if defined(PETSC_HAVE_SPOOLES) || defined(PETSC_HAVE_MUMPS) 756 PetscTruth flag; 757 #endif 758 759 PetscFunctionBegin; 760 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 761 762 if (m) rmax = ailen[0]; 763 for (i=1; i<mbs; i++) { 764 /* move each row back by the amount of empty slots (fshift) before it*/ 765 fshift += imax[i-1] - ailen[i-1]; 766 rmax = PetscMax(rmax,ailen[i]); 767 if (fshift) { 768 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 769 N = ailen[i]; 770 for (j=0; j<N; j++) { 771 ip[j-fshift] = ip[j]; 772 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 773 } 774 } 775 ai[i] = ai[i-1] + ailen[i-1]; 776 } 777 if (mbs) { 778 fshift += imax[mbs-1] - ailen[mbs-1]; 779 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 780 } 781 /* reset ilen and imax for each row */ 782 for (i=0; i<mbs; i++) { 783 ailen[i] = imax[i] = ai[i+1] - ai[i]; 784 } 785 a->s_nz = ai[mbs]; 786 787 /* diagonals may have moved, reset it */ 788 if (a->diag) { 789 ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(int));CHKERRQ(ierr); 790 } 791 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 792 m,A->m,a->bs,fshift*bs2,a->s_nz*bs2); 793 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %d\n", 794 a->reallocs); 795 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 796 a->reallocs = 0; 797 A->info.nz_unneeded = (PetscReal)fshift*bs2; 798 799 #if defined(PETSC_HAVE_SPOOLES) 800 ierr = PetscOptionsHasName(A->prefix,"-mat_sbaij_spooles",&flag);CHKERRQ(ierr); 801 if (flag) { ierr = MatUseSpooles_SeqSBAIJ(A);CHKERRQ(ierr); } 802 #endif 803 #if defined(PETSC_HAVE_MUMPS) 804 ierr = PetscOptionsHasName(A->prefix,"-mat_sbaij_mumps",&flag);CHKERRQ(ierr); 805 if (flag) { ierr = MatUseMUMPS_MPIAIJ(A);CHKERRQ(ierr); } 806 #endif 807 808 PetscFunctionReturn(0); 809 } 810 811 /* 812 This function returns an array of flags which indicate the locations of contiguous 813 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 814 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 815 Assume: sizes should be long enough to hold all the values. 816 */ 817 #undef __FUNCT__ 818 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks" 819 int MatZeroRows_SeqSBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 820 { 821 int i,j,k,row; 822 PetscTruth flg; 823 824 PetscFunctionBegin; 825 for (i=0,j=0; i<n; j++) { 826 row = idx[i]; 827 if (row%bs!=0) { /* Not the begining of a block */ 828 sizes[j] = 1; 829 i++; 830 } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */ 831 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 832 i++; 833 } else { /* Begining of the block, so check if the complete block exists */ 834 flg = PETSC_TRUE; 835 for (k=1; k<bs; k++) { 836 if (row+k != idx[i+k]) { /* break in the block */ 837 flg = PETSC_FALSE; 838 break; 839 } 840 } 841 if (flg == PETSC_TRUE) { /* No break in the bs */ 842 sizes[j] = bs; 843 i+= bs; 844 } else { 845 sizes[j] = 1; 846 i++; 847 } 848 } 849 } 850 *bs_max = j; 851 PetscFunctionReturn(0); 852 } 853 854 #undef __FUNCT__ 855 #define __FUNCT__ "MatZeroRows_SeqSBAIJ" 856 int MatZeroRows_SeqSBAIJ(Mat A,IS is,PetscScalar *diag) 857 { 858 PetscFunctionBegin; 859 SETERRQ(PETSC_ERR_SUP,"No support for this function yet"); 860 } 861 862 /* Only add/insert a(i,j) with i<=j (blocks). 863 Any a(i,j) with i>j input by user is ingored. 864 */ 865 866 #undef __FUNCT__ 867 #define __FUNCT__ "MatSetValues_SeqSBAIJ" 868 int MatSetValues_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is) 869 { 870 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 871 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 872 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 873 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 874 int ridx,cidx,bs2=a->bs2,ierr; 875 MatScalar *ap,value,*aa=a->a,*bap; 876 877 PetscFunctionBegin; 878 879 for (k=0; k<m; k++) { /* loop over added rows */ 880 row = im[k]; /* row number */ 881 brow = row/bs; /* block row number */ 882 if (row < 0) continue; 883 #if defined(PETSC_USE_BOPT_g) 884 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m); 885 #endif 886 rp = aj + ai[brow]; /*ptr to beginning of column value of the row block*/ 887 ap = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/ 888 rmax = imax[brow]; /* maximum space allocated for this row */ 889 nrow = ailen[brow]; /* actual length of this row */ 890 low = 0; 891 892 for (l=0; l<n; l++) { /* loop over added columns */ 893 if (in[l] < 0) continue; 894 #if defined(PETSC_USE_BOPT_g) 895 if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->m); 896 #endif 897 col = in[l]; 898 bcol = col/bs; /* block col number */ 899 900 if (brow <= bcol){ 901 ridx = row % bs; cidx = col % bs; /*row and col index inside the block */ 902 if ((brow==bcol && ridx<=cidx) || (brow<bcol)){ 903 /* element value a(k,l) */ 904 if (roworiented) { 905 value = v[l + k*n]; 906 } else { 907 value = v[k + l*m]; 908 } 909 910 /* move pointer bap to a(k,l) quickly and add/insert value */ 911 if (!sorted) low = 0; high = nrow; 912 while (high-low > 7) { 913 t = (low+high)/2; 914 if (rp[t] > bcol) high = t; 915 else low = t; 916 } 917 for (i=low; i<high; i++) { 918 /* printf("The loop of i=low.., rp[%d]=%d\n",i,rp[i]); */ 919 if (rp[i] > bcol) break; 920 if (rp[i] == bcol) { 921 bap = ap + bs2*i + bs*cidx + ridx; 922 if (is == ADD_VALUES) *bap += value; 923 else *bap = value; 924 /* for diag block, add/insert its symmetric element a(cidx,ridx) */ 925 if (brow == bcol && ridx < cidx){ 926 bap = ap + bs2*i + bs*ridx + cidx; 927 if (is == ADD_VALUES) *bap += value; 928 else *bap = value; 929 } 930 goto noinsert1; 931 } 932 } 933 934 if (nonew == 1) goto noinsert1; 935 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 936 if (nrow >= rmax) { 937 /* there is no extra room in row, therefore enlarge */ 938 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 939 MatScalar *new_a; 940 941 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 942 943 /* Malloc new storage space */ 944 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 945 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 946 new_j = (int*)(new_a + bs2*new_nz); 947 new_i = new_j + new_nz; 948 949 /* copy over old data into new slots */ 950 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 951 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 952 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 953 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 954 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 955 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 956 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 957 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 958 /* free up old matrix storage */ 959 ierr = PetscFree(a->a);CHKERRQ(ierr); 960 if (!a->singlemalloc) { 961 ierr = PetscFree(a->i);CHKERRQ(ierr); 962 ierr = PetscFree(a->j);CHKERRQ(ierr); 963 } 964 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 965 a->singlemalloc = PETSC_TRUE; 966 967 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 968 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 969 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 970 a->s_maxnz += bs2*CHUNKSIZE; 971 a->reallocs++; 972 a->s_nz++; 973 } 974 975 N = nrow++ - 1; 976 /* shift up all the later entries in this row */ 977 for (ii=N; ii>=i; ii--) { 978 rp[ii+1] = rp[ii]; 979 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 980 } 981 if (N>=i) { 982 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 983 } 984 rp[i] = bcol; 985 ap[bs2*i + bs*cidx + ridx] = value; 986 noinsert1:; 987 low = i; 988 /* } */ 989 } 990 } /* end of if .. if.. */ 991 } /* end of loop over added columns */ 992 ailen[brow] = nrow; 993 } /* end of loop over added rows */ 994 995 PetscFunctionReturn(0); 996 } 997 998 extern int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,MatFactorInfo*,Mat*); 999 extern int MatCholeskyFactor_SeqSBAIJ(Mat,IS,MatFactorInfo*); 1000 extern int MatIncreaseOverlap_SeqSBAIJ(Mat,int,IS*,int); 1001 extern int MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,int,MatReuse,Mat*); 1002 extern int MatGetSubMatrices_SeqSBAIJ(Mat,int,IS*,IS*,MatReuse,Mat**); 1003 extern int MatMultTranspose_SeqSBAIJ(Mat,Vec,Vec); 1004 extern int MatMultTransposeAdd_SeqSBAIJ(Mat,Vec,Vec,Vec); 1005 extern int MatScale_SeqSBAIJ(PetscScalar*,Mat); 1006 extern int MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *); 1007 extern int MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*); 1008 extern int MatGetDiagonal_SeqSBAIJ(Mat,Vec); 1009 extern int MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec); 1010 extern int MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *); 1011 extern int MatZeroEntries_SeqSBAIJ(Mat); 1012 extern int MatGetRowMax_SeqSBAIJ(Mat,Vec); 1013 1014 extern int MatSolve_SeqSBAIJ_N(Mat,Vec,Vec); 1015 extern int MatSolve_SeqSBAIJ_1(Mat,Vec,Vec); 1016 extern int MatSolve_SeqSBAIJ_2(Mat,Vec,Vec); 1017 extern int MatSolve_SeqSBAIJ_3(Mat,Vec,Vec); 1018 extern int MatSolve_SeqSBAIJ_4(Mat,Vec,Vec); 1019 extern int MatSolve_SeqSBAIJ_5(Mat,Vec,Vec); 1020 extern int MatSolve_SeqSBAIJ_6(Mat,Vec,Vec); 1021 extern int MatSolve_SeqSBAIJ_7(Mat,Vec,Vec); 1022 extern int MatSolveTranspose_SeqSBAIJ_7(Mat,Vec,Vec); 1023 extern int MatSolveTranspose_SeqSBAIJ_6(Mat,Vec,Vec); 1024 extern int MatSolveTranspose_SeqSBAIJ_5(Mat,Vec,Vec); 1025 extern int MatSolveTranspose_SeqSBAIJ_4(Mat,Vec,Vec); 1026 extern int MatSolveTranspose_SeqSBAIJ_3(Mat,Vec,Vec); 1027 extern int MatSolveTranspose_SeqSBAIJ_2(Mat,Vec,Vec); 1028 extern int MatSolveTranspose_SeqSBAIJ_1(Mat,Vec,Vec); 1029 1030 extern int MatSolves_SeqSBAIJ_1(Mat,Vecs,Vecs); 1031 1032 extern int MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec); 1033 extern int MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec); 1034 extern int MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec); 1035 extern int MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec); 1036 extern int MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec); 1037 extern int MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec); 1038 extern int MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec); 1039 extern int MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec); 1040 1041 extern int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,Mat*); 1042 extern int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,Mat*); 1043 extern int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,Mat*); 1044 extern int MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,Mat*); 1045 extern int MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,Mat*); 1046 extern int MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,Mat*); 1047 extern int MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,Mat*); 1048 extern int MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,Mat*); 1049 extern int MatGetInertia_SeqSBAIJ(Mat,int*,int*,int*); 1050 1051 extern int MatMult_SeqSBAIJ_1(Mat,Vec,Vec); 1052 extern int MatMult_SeqSBAIJ_2(Mat,Vec,Vec); 1053 extern int MatMult_SeqSBAIJ_3(Mat,Vec,Vec); 1054 extern int MatMult_SeqSBAIJ_4(Mat,Vec,Vec); 1055 extern int MatMult_SeqSBAIJ_5(Mat,Vec,Vec); 1056 extern int MatMult_SeqSBAIJ_6(Mat,Vec,Vec); 1057 extern int MatMult_SeqSBAIJ_7(Mat,Vec,Vec); 1058 extern int MatMult_SeqSBAIJ_N(Mat,Vec,Vec); 1059 1060 extern int MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec); 1061 extern int MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec); 1062 extern int MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec); 1063 extern int MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec); 1064 extern int MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec); 1065 extern int MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec); 1066 extern int MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec); 1067 extern int MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec); 1068 1069 #ifdef HAVE_MatICCFactor 1070 /* modefied from MatILUFactor_SeqSBAIJ, needs further work! */ 1071 #undef __FUNCT__ 1072 #define __FUNCT__ "MatICCFactor_SeqSBAIJ" 1073 int MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info) 1074 { 1075 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 1076 Mat outA; 1077 int ierr; 1078 PetscTruth row_identity,col_identity; 1079 1080 PetscFunctionBegin; 1081 /* 1082 if (level != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1083 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1084 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1085 if (!row_identity || !col_identity) { 1086 SETERRQ(1,"Row and column permutations must be identity for in-place ICC"); 1087 } 1088 */ 1089 1090 outA = inA; 1091 inA->factor = FACTOR_CHOLESKY; 1092 1093 if (!a->diag) { 1094 ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr); 1095 } 1096 /* 1097 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1098 for ILU(0) factorization with natural ordering 1099 */ 1100 switch (a->bs) { 1101 case 1: 1102 inA->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1103 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1104 inA->ops->solves = MatSolves_SeqSBAIJ_1; 1105 PetscLoginfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n"); 1106 case 2: 1107 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 1108 inA->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1109 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1110 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 1111 break; 1112 case 3: 1113 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 1114 inA->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1115 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1116 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n"); 1117 break; 1118 case 4: 1119 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 1120 inA->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1121 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1122 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 1123 break; 1124 case 5: 1125 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 1126 inA->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1127 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1128 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 1129 break; 1130 case 6: 1131 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 1132 inA->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1133 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1134 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 1135 break; 1136 case 7: 1137 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 1138 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1139 inA->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1140 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 1141 break; 1142 default: 1143 a->row = row; 1144 a->icol = col; 1145 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1146 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1147 1148 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1149 ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr); 1150 PetscLogObjectParent(inA,a->icol); 1151 1152 if (!a->solve_work) { 1153 ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1154 PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar)); 1155 } 1156 } 1157 1158 ierr = MatCholeskyFactorNumeric(inA,&outA);CHKERRQ(ierr); 1159 1160 PetscFunctionReturn(0); 1161 } 1162 #endif 1163 1164 #undef __FUNCT__ 1165 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ" 1166 int MatPrintHelp_SeqSBAIJ(Mat A) 1167 { 1168 static PetscTruth called = PETSC_FALSE; 1169 MPI_Comm comm = A->comm; 1170 int ierr; 1171 1172 PetscFunctionBegin; 1173 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1174 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1175 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1176 PetscFunctionReturn(0); 1177 } 1178 1179 EXTERN_C_BEGIN 1180 #undef __FUNCT__ 1181 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ" 1182 int MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,int *indices) 1183 { 1184 Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data; 1185 int i,nz,n; 1186 1187 PetscFunctionBegin; 1188 nz = baij->s_maxnz; 1189 n = mat->n; 1190 for (i=0; i<nz; i++) { 1191 baij->j[i] = indices[i]; 1192 } 1193 baij->s_nz = nz; 1194 for (i=0; i<n; i++) { 1195 baij->ilen[i] = baij->imax[i]; 1196 } 1197 1198 PetscFunctionReturn(0); 1199 } 1200 EXTERN_C_END 1201 1202 #undef __FUNCT__ 1203 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices" 1204 /*@ 1205 MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows 1206 in the matrix. 1207 1208 Input Parameters: 1209 + mat - the SeqSBAIJ matrix 1210 - indices - the column indices 1211 1212 Level: advanced 1213 1214 Notes: 1215 This can be called if you have precomputed the nonzero structure of the 1216 matrix and want to provide it to the matrix object to improve the performance 1217 of the MatSetValues() operation. 1218 1219 You MUST have set the correct numbers of nonzeros per row in the call to 1220 MatCreateSeqSBAIJ(). 1221 1222 MUST be called before any calls to MatSetValues() 1223 1224 .seealso: MatCreateSeqSBAIJ 1225 @*/ 1226 int MatSeqSBAIJSetColumnIndices(Mat mat,int *indices) 1227 { 1228 int ierr,(*f)(Mat,int *); 1229 1230 PetscFunctionBegin; 1231 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1232 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1233 if (f) { 1234 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1235 } else { 1236 SETERRQ(1,"Wrong type of matrix to set column indices"); 1237 } 1238 PetscFunctionReturn(0); 1239 } 1240 1241 #undef __FUNCT__ 1242 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ" 1243 int MatSetUpPreallocation_SeqSBAIJ(Mat A) 1244 { 1245 int ierr; 1246 1247 PetscFunctionBegin; 1248 ierr = MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1249 PetscFunctionReturn(0); 1250 } 1251 1252 #undef __FUNCT__ 1253 #define __FUNCT__ "MatGetArray_SeqSBAIJ" 1254 int MatGetArray_SeqSBAIJ(Mat A,PetscScalar **array) 1255 { 1256 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1257 PetscFunctionBegin; 1258 *array = a->a; 1259 PetscFunctionReturn(0); 1260 } 1261 1262 #undef __FUNCT__ 1263 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ" 1264 int MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar **array) 1265 { 1266 PetscFunctionBegin; 1267 PetscFunctionReturn(0); 1268 } 1269 1270 #include "petscblaslapack.h" 1271 #undef __FUNCT__ 1272 #define __FUNCT__ "MatAXPY_SeqSBAIJ" 1273 int MatAXPY_SeqSBAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str) 1274 { 1275 Mat_SeqSBAIJ *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data; 1276 int ierr,one=1,i,bs=y->bs,bs2,j; 1277 1278 PetscFunctionBegin; 1279 if (str == SAME_NONZERO_PATTERN) { 1280 BLaxpy_(&x->s_nz,a,x->a,&one,y->a,&one); 1281 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1282 if (y->xtoy && y->XtoY != X) { 1283 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1284 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1285 } 1286 if (!y->xtoy) { /* get xtoy */ 1287 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1288 y->XtoY = X; 1289 } 1290 bs2 = bs*bs; 1291 for (i=0; i<x->s_nz; i++) { 1292 j = 0; 1293 while (j < bs2){ 1294 y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]); 1295 j++; 1296 } 1297 } 1298 PetscLogInfo(0,"MatAXPY_SeqSBAIJ: ratio of nnz_s(X)/nnz_s(Y): %d/%d = %g\n",bs2*x->s_nz,bs2*y->s_nz,(PetscReal)(bs2*x->s_nz)/(bs2*y->s_nz)); 1299 } else { 1300 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1301 } 1302 PetscFunctionReturn(0); 1303 } 1304 1305 /* -------------------------------------------------------------------*/ 1306 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ, 1307 MatGetRow_SeqSBAIJ, 1308 MatRestoreRow_SeqSBAIJ, 1309 MatMult_SeqSBAIJ_N, 1310 MatMultAdd_SeqSBAIJ_N, 1311 MatMultTranspose_SeqSBAIJ, 1312 MatMultTransposeAdd_SeqSBAIJ, 1313 MatSolve_SeqSBAIJ_N, 1314 0, 1315 0, 1316 0, 1317 0, 1318 MatCholeskyFactor_SeqSBAIJ, 1319 MatRelax_SeqSBAIJ, 1320 MatTranspose_SeqSBAIJ, 1321 MatGetInfo_SeqSBAIJ, 1322 MatEqual_SeqSBAIJ, 1323 MatGetDiagonal_SeqSBAIJ, 1324 MatDiagonalScale_SeqSBAIJ, 1325 MatNorm_SeqSBAIJ, 1326 0, 1327 MatAssemblyEnd_SeqSBAIJ, 1328 0, 1329 MatSetOption_SeqSBAIJ, 1330 MatZeroEntries_SeqSBAIJ, 1331 MatZeroRows_SeqSBAIJ, 1332 0, 1333 0, 1334 MatCholeskyFactorSymbolic_SeqSBAIJ, 1335 MatCholeskyFactorNumeric_SeqSBAIJ_N, 1336 MatSetUpPreallocation_SeqSBAIJ, 1337 0, 1338 MatICCFactorSymbolic_SeqSBAIJ, 1339 MatGetArray_SeqSBAIJ, 1340 MatRestoreArray_SeqSBAIJ, 1341 MatDuplicate_SeqSBAIJ, 1342 0, 1343 0, 1344 0, 1345 0, 1346 MatAXPY_SeqSBAIJ, 1347 MatGetSubMatrices_SeqSBAIJ, 1348 MatIncreaseOverlap_SeqSBAIJ, 1349 MatGetValues_SeqSBAIJ, 1350 0, 1351 MatPrintHelp_SeqSBAIJ, 1352 MatScale_SeqSBAIJ, 1353 0, 1354 0, 1355 0, 1356 MatGetBlockSize_SeqSBAIJ, 1357 MatGetRowIJ_SeqSBAIJ, 1358 MatRestoreRowIJ_SeqSBAIJ, 1359 0, 1360 0, 1361 0, 1362 0, 1363 0, 1364 0, 1365 MatSetValuesBlocked_SeqSBAIJ, 1366 MatGetSubMatrix_SeqSBAIJ, 1367 0, 1368 0, 1369 MatGetPetscMaps_Petsc, 1370 0, 1371 0, 1372 0, 1373 0, 1374 0, 1375 0, 1376 MatGetRowMax_SeqSBAIJ, 1377 0, 1378 0, 1379 0, 1380 0, 1381 0, 1382 0, 1383 0, 1384 0, 1385 0, 1386 0, 1387 0, 1388 0, 1389 0, 1390 #if !defined(PETSC_USE_COMPLEX) 1391 MatGetInertia_SeqSBAIJ 1392 #else 1393 0 1394 #endif 1395 }; 1396 1397 EXTERN_C_BEGIN 1398 #undef __FUNCT__ 1399 #define __FUNCT__ "MatStoreValues_SeqSBAIJ" 1400 int MatStoreValues_SeqSBAIJ(Mat mat) 1401 { 1402 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1403 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1404 int ierr; 1405 1406 PetscFunctionBegin; 1407 if (aij->nonew != 1) { 1408 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1409 } 1410 1411 /* allocate space for values if not already there */ 1412 if (!aij->saved_values) { 1413 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1414 } 1415 1416 /* copy values over */ 1417 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1418 PetscFunctionReturn(0); 1419 } 1420 EXTERN_C_END 1421 1422 EXTERN_C_BEGIN 1423 #undef __FUNCT__ 1424 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ" 1425 int MatRetrieveValues_SeqSBAIJ(Mat mat) 1426 { 1427 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1428 int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr; 1429 1430 PetscFunctionBegin; 1431 if (aij->nonew != 1) { 1432 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1433 } 1434 if (!aij->saved_values) { 1435 SETERRQ(1,"Must call MatStoreValues(A);first"); 1436 } 1437 1438 /* copy values over */ 1439 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1440 PetscFunctionReturn(0); 1441 } 1442 EXTERN_C_END 1443 1444 EXTERN_C_BEGIN 1445 #undef __FUNCT__ 1446 #define __FUNCT__ "MatCreate_SeqSBAIJ" 1447 int MatCreate_SeqSBAIJ(Mat B) 1448 { 1449 Mat_SeqSBAIJ *b; 1450 int ierr,size; 1451 1452 PetscFunctionBegin; 1453 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1454 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1455 B->m = B->M = PetscMax(B->m,B->M); 1456 B->n = B->N = PetscMax(B->n,B->N); 1457 1458 ierr = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr); 1459 B->data = (void*)b; 1460 ierr = PetscMemzero(b,sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr); 1461 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1462 B->ops->destroy = MatDestroy_SeqSBAIJ; 1463 B->ops->view = MatView_SeqSBAIJ; 1464 B->factor = 0; 1465 B->lupivotthreshold = 1.0; 1466 B->mapping = 0; 1467 b->row = 0; 1468 b->icol = 0; 1469 b->reallocs = 0; 1470 b->saved_values = 0; 1471 1472 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1473 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1474 1475 b->sorted = PETSC_FALSE; 1476 b->roworiented = PETSC_TRUE; 1477 b->nonew = 0; 1478 b->diag = 0; 1479 b->solve_work = 0; 1480 b->mult_work = 0; 1481 B->spptr = 0; 1482 b->keepzeroedrows = PETSC_FALSE; 1483 b->xtoy = 0; 1484 b->XtoY = 0; 1485 1486 b->inew = 0; 1487 b->jnew = 0; 1488 b->anew = 0; 1489 b->a2anew = 0; 1490 b->permute = PETSC_FALSE; 1491 1492 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1493 "MatStoreValues_SeqSBAIJ", 1494 (void*)MatStoreValues_SeqSBAIJ);CHKERRQ(ierr); 1495 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1496 "MatRetrieveValues_SeqSBAIJ", 1497 (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr); 1498 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C", 1499 "MatSeqSBAIJSetColumnIndices_SeqSBAIJ", 1500 (void*)MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr); 1501 PetscFunctionReturn(0); 1502 } 1503 EXTERN_C_END 1504 1505 #undef __FUNCT__ 1506 #define __FUNCT__ "MatSeqSBAIJSetPreallocation" 1507 /*@C 1508 MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block 1509 compressed row) format. For good matrix assembly performance the 1510 user should preallocate the matrix storage by setting the parameter nz 1511 (or the array nnz). By setting these parameters accurately, performance 1512 during matrix assembly can be increased by more than a factor of 50. 1513 1514 Collective on Mat 1515 1516 Input Parameters: 1517 + A - the symmetric matrix 1518 . bs - size of block 1519 . nz - number of block nonzeros per block row (same for all rows) 1520 - nnz - array containing the number of block nonzeros in the upper triangular plus 1521 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1522 1523 Options Database Keys: 1524 . -mat_no_unroll - uses code that does not unroll the loops in the 1525 block calculations (much slower) 1526 . -mat_block_size - size of the blocks to use 1527 1528 Level: intermediate 1529 1530 Notes: 1531 Specify the preallocated storage with either nz or nnz (not both). 1532 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1533 allocation. For additional details, see the users manual chapter on 1534 matrices. 1535 1536 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1537 @*/ 1538 int MatSeqSBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz) 1539 { 1540 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1541 int i,len,ierr,mbs,bs2; 1542 PetscTruth flg; 1543 int s_nz; 1544 1545 PetscFunctionBegin; 1546 B->preallocated = PETSC_TRUE; 1547 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1548 mbs = B->m/bs; 1549 bs2 = bs*bs; 1550 1551 if (mbs*bs != B->m) { 1552 SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize"); 1553 } 1554 1555 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3; 1556 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 1557 if (nnz) { 1558 for (i=0; i<mbs; i++) { 1559 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1560 if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],mbs); 1561 } 1562 } 1563 1564 ierr = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1565 if (!flg) { 1566 switch (bs) { 1567 case 1: 1568 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1; 1569 B->ops->solve = MatSolve_SeqSBAIJ_1; 1570 B->ops->solves = MatSolves_SeqSBAIJ_1; 1571 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_1; 1572 B->ops->mult = MatMult_SeqSBAIJ_1; 1573 B->ops->multadd = MatMultAdd_SeqSBAIJ_1; 1574 break; 1575 case 2: 1576 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2; 1577 B->ops->solve = MatSolve_SeqSBAIJ_2; 1578 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_2; 1579 B->ops->mult = MatMult_SeqSBAIJ_2; 1580 B->ops->multadd = MatMultAdd_SeqSBAIJ_2; 1581 break; 1582 case 3: 1583 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3; 1584 B->ops->solve = MatSolve_SeqSBAIJ_3; 1585 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_3; 1586 B->ops->mult = MatMult_SeqSBAIJ_3; 1587 B->ops->multadd = MatMultAdd_SeqSBAIJ_3; 1588 break; 1589 case 4: 1590 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4; 1591 B->ops->solve = MatSolve_SeqSBAIJ_4; 1592 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_4; 1593 B->ops->mult = MatMult_SeqSBAIJ_4; 1594 B->ops->multadd = MatMultAdd_SeqSBAIJ_4; 1595 break; 1596 case 5: 1597 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5; 1598 B->ops->solve = MatSolve_SeqSBAIJ_5; 1599 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_5; 1600 B->ops->mult = MatMult_SeqSBAIJ_5; 1601 B->ops->multadd = MatMultAdd_SeqSBAIJ_5; 1602 break; 1603 case 6: 1604 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6; 1605 B->ops->solve = MatSolve_SeqSBAIJ_6; 1606 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_6; 1607 B->ops->mult = MatMult_SeqSBAIJ_6; 1608 B->ops->multadd = MatMultAdd_SeqSBAIJ_6; 1609 break; 1610 case 7: 1611 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7; 1612 B->ops->solve = MatSolve_SeqSBAIJ_7; 1613 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_7; 1614 B->ops->mult = MatMult_SeqSBAIJ_7; 1615 B->ops->multadd = MatMultAdd_SeqSBAIJ_7; 1616 break; 1617 } 1618 } 1619 1620 b->mbs = mbs; 1621 b->nbs = mbs; 1622 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 1623 if (!nnz) { 1624 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1625 else if (nz <= 0) nz = 1; 1626 for (i=0; i<mbs; i++) { 1627 b->imax[i] = nz; 1628 } 1629 nz = nz*mbs; /* total nz */ 1630 } else { 1631 nz = 0; 1632 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1633 } 1634 /* s_nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */ 1635 s_nz = nz; 1636 1637 /* allocate the matrix space */ 1638 len = s_nz*sizeof(int) + s_nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 1639 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 1640 ierr = PetscMemzero(b->a,s_nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1641 b->j = (int*)(b->a + s_nz*bs2); 1642 ierr = PetscMemzero(b->j,s_nz*sizeof(int));CHKERRQ(ierr); 1643 b->i = b->j + s_nz; 1644 b->singlemalloc = PETSC_TRUE; 1645 1646 /* pointer to beginning of each row */ 1647 b->i[0] = 0; 1648 for (i=1; i<mbs+1; i++) { 1649 b->i[i] = b->i[i-1] + b->imax[i-1]; 1650 } 1651 1652 /* b->ilen will count nonzeros in each block row so far. */ 1653 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 1654 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ)); 1655 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1656 1657 b->bs = bs; 1658 b->bs2 = bs2; 1659 b->s_nz = 0; 1660 b->s_maxnz = s_nz*bs2; 1661 1662 b->inew = 0; 1663 b->jnew = 0; 1664 b->anew = 0; 1665 b->a2anew = 0; 1666 b->permute = PETSC_FALSE; 1667 PetscFunctionReturn(0); 1668 } 1669 1670 1671 #undef __FUNCT__ 1672 #define __FUNCT__ "MatCreateSeqSBAIJ" 1673 /*@C 1674 MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block 1675 compressed row) format. For good matrix assembly performance the 1676 user should preallocate the matrix storage by setting the parameter nz 1677 (or the array nnz). By setting these parameters accurately, performance 1678 during matrix assembly can be increased by more than a factor of 50. 1679 1680 Collective on MPI_Comm 1681 1682 Input Parameters: 1683 + comm - MPI communicator, set to PETSC_COMM_SELF 1684 . bs - size of block 1685 . m - number of rows, or number of columns 1686 . nz - number of block nonzeros per block row (same for all rows) 1687 - nnz - array containing the number of block nonzeros in the upper triangular plus 1688 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1689 1690 Output Parameter: 1691 . A - the symmetric matrix 1692 1693 Options Database Keys: 1694 . -mat_no_unroll - uses code that does not unroll the loops in the 1695 block calculations (much slower) 1696 . -mat_block_size - size of the blocks to use 1697 1698 Level: intermediate 1699 1700 Notes: 1701 1702 Specify the preallocated storage with either nz or nnz (not both). 1703 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1704 allocation. For additional details, see the users manual chapter on 1705 matrices. 1706 1707 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1708 @*/ 1709 int MatCreateSeqSBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A) 1710 { 1711 int ierr; 1712 1713 PetscFunctionBegin; 1714 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1715 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1716 ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1717 PetscFunctionReturn(0); 1718 } 1719 1720 #undef __FUNCT__ 1721 #define __FUNCT__ "MatDuplicate_SeqSBAIJ" 1722 int MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1723 { 1724 Mat C; 1725 Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data; 1726 int i,len,mbs = a->mbs,nz = a->s_nz,bs2 =a->bs2,ierr; 1727 1728 PetscFunctionBegin; 1729 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1730 1731 *B = 0; 1732 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1733 ierr = MatSetType(C,MATSEQSBAIJ);CHKERRQ(ierr); 1734 c = (Mat_SeqSBAIJ*)C->data; 1735 1736 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1737 C->preallocated = PETSC_TRUE; 1738 C->factor = A->factor; 1739 c->row = 0; 1740 c->icol = 0; 1741 c->saved_values = 0; 1742 c->keepzeroedrows = a->keepzeroedrows; 1743 C->assembled = PETSC_TRUE; 1744 1745 c->bs = a->bs; 1746 c->bs2 = a->bs2; 1747 c->mbs = a->mbs; 1748 c->nbs = a->nbs; 1749 1750 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 1751 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 1752 for (i=0; i<mbs; i++) { 1753 c->imax[i] = a->imax[i]; 1754 c->ilen[i] = a->ilen[i]; 1755 } 1756 1757 /* allocate the matrix space */ 1758 c->singlemalloc = PETSC_TRUE; 1759 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1760 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1761 c->j = (int*)(c->a + nz*bs2); 1762 c->i = c->j + nz; 1763 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1764 if (mbs > 0) { 1765 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1766 if (cpvalues == MAT_COPY_VALUES) { 1767 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1768 } else { 1769 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1770 } 1771 } 1772 1773 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ)); 1774 c->sorted = a->sorted; 1775 c->roworiented = a->roworiented; 1776 c->nonew = a->nonew; 1777 1778 if (a->diag) { 1779 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 1780 PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 1781 for (i=0; i<mbs; i++) { 1782 c->diag[i] = a->diag[i]; 1783 } 1784 } else c->diag = 0; 1785 c->s_nz = a->s_nz; 1786 c->s_maxnz = a->s_maxnz; 1787 c->solve_work = 0; 1788 C->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1789 c->mult_work = 0; 1790 *B = C; 1791 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1792 PetscFunctionReturn(0); 1793 } 1794 1795 EXTERN_C_BEGIN 1796 #undef __FUNCT__ 1797 #define __FUNCT__ "MatLoad_SeqSBAIJ" 1798 int MatLoad_SeqSBAIJ(PetscViewer viewer,MatType type,Mat *A) 1799 { 1800 Mat_SeqSBAIJ *a; 1801 Mat B; 1802 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1803 int *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount; 1804 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1805 int *masked,nmask,tmp,bs2,ishift; 1806 PetscScalar *aa; 1807 MPI_Comm comm = ((PetscObject)viewer)->comm; 1808 #if defined(PETSC_HAVE_SPOOLES) || defined(PETSC_HAVE_MUMPS) 1809 PetscTruth flag; 1810 #endif 1811 1812 PetscFunctionBegin; 1813 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1814 bs2 = bs*bs; 1815 1816 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1817 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1818 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1819 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1820 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1821 M = header[1]; N = header[2]; nz = header[3]; 1822 1823 if (header[3] < 0) { 1824 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ"); 1825 } 1826 1827 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1828 1829 /* 1830 This code adds extra rows to make sure the number of rows is 1831 divisible by the blocksize 1832 */ 1833 mbs = M/bs; 1834 extra_rows = bs - M + bs*(mbs); 1835 if (extra_rows == bs) extra_rows = 0; 1836 else mbs++; 1837 if (extra_rows) { 1838 PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n"); 1839 } 1840 1841 /* read in row lengths */ 1842 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 1843 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1844 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1845 1846 /* read in column indices */ 1847 ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 1848 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1849 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1850 1851 /* loop over row lengths determining block row lengths */ 1852 ierr = PetscMalloc(mbs*sizeof(int),&s_browlengths);CHKERRQ(ierr); 1853 ierr = PetscMemzero(s_browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1854 ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 1855 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1856 masked = mask + mbs; 1857 rowcount = 0; nzcount = 0; 1858 for (i=0; i<mbs; i++) { 1859 nmask = 0; 1860 for (j=0; j<bs; j++) { 1861 kmax = rowlengths[rowcount]; 1862 for (k=0; k<kmax; k++) { 1863 tmp = jj[nzcount++]/bs; /* block col. index */ 1864 if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;} 1865 } 1866 rowcount++; 1867 } 1868 s_browlengths[i] += nmask; 1869 1870 /* zero out the mask elements we set */ 1871 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1872 } 1873 1874 /* create our matrix */ 1875 ierr = MatCreateSeqSBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,s_browlengths,A);CHKERRQ(ierr); 1876 B = *A; 1877 a = (Mat_SeqSBAIJ*)B->data; 1878 1879 /* set matrix "i" values */ 1880 a->i[0] = 0; 1881 for (i=1; i<= mbs; i++) { 1882 a->i[i] = a->i[i-1] + s_browlengths[i-1]; 1883 a->ilen[i-1] = s_browlengths[i-1]; 1884 } 1885 a->s_nz = a->i[mbs]; 1886 1887 /* read in nonzero values */ 1888 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1889 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1890 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1891 1892 /* set "a" and "j" values into matrix */ 1893 nzcount = 0; jcount = 0; 1894 for (i=0; i<mbs; i++) { 1895 nzcountb = nzcount; 1896 nmask = 0; 1897 for (j=0; j<bs; j++) { 1898 kmax = rowlengths[i*bs+j]; 1899 for (k=0; k<kmax; k++) { 1900 tmp = jj[nzcount++]/bs; /* block col. index */ 1901 if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;} 1902 } 1903 } 1904 /* sort the masked values */ 1905 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1906 1907 /* set "j" values into matrix */ 1908 maskcount = 1; 1909 for (j=0; j<nmask; j++) { 1910 a->j[jcount++] = masked[j]; 1911 mask[masked[j]] = maskcount++; 1912 } 1913 1914 /* set "a" values into matrix */ 1915 ishift = bs2*a->i[i]; 1916 for (j=0; j<bs; j++) { 1917 kmax = rowlengths[i*bs+j]; 1918 for (k=0; k<kmax; k++) { 1919 tmp = jj[nzcountb]/bs ; /* block col. index */ 1920 if (tmp >= i){ 1921 block = mask[tmp] - 1; 1922 point = jj[nzcountb] - bs*tmp; 1923 idx = ishift + bs2*block + j + bs*point; 1924 a->a[idx] = aa[nzcountb]; 1925 } 1926 nzcountb++; 1927 } 1928 } 1929 /* zero out the mask elements we set */ 1930 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1931 } 1932 if (jcount != a->s_nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1933 1934 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1935 ierr = PetscFree(s_browlengths);CHKERRQ(ierr); 1936 ierr = PetscFree(aa);CHKERRQ(ierr); 1937 ierr = PetscFree(jj);CHKERRQ(ierr); 1938 ierr = PetscFree(mask);CHKERRQ(ierr); 1939 1940 B->assembled = PETSC_TRUE; 1941 #if defined(PETSC_HAVE_SPOOLES) 1942 ierr = PetscOptionsHasName(B->prefix,"-mat_sbaij_spooles",&flag);CHKERRQ(ierr); 1943 if (flag) { ierr = MatUseSpooles_SeqSBAIJ(B);CHKERRQ(ierr); } 1944 #endif 1945 #if defined(PETSC_HAVE_MUMPS) 1946 ierr = PetscOptionsHasName(B->prefix,"-mat_sbaij_mumps",&flag);CHKERRQ(ierr); 1947 if (flag) { ierr = MatUseMUMPS_MPIAIJ(B);CHKERRQ(ierr); } 1948 #endif 1949 ierr = MatView_Private(B);CHKERRQ(ierr); 1950 PetscFunctionReturn(0); 1951 } 1952 EXTERN_C_END 1953 1954 #undef __FUNCT__ 1955 #define __FUNCT__ "MatRelax_SeqSBAIJ" 1956 int MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 1957 { 1958 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1959 MatScalar *aa=a->a,*v,*v1; 1960 PetscScalar *x,*b,*t,sum,d; 1961 int m=a->mbs,bs=a->bs,*ai=a->i,*aj=a->j,ierr; 1962 int nz,nz1,*vj,*vj1,i; 1963 1964 PetscFunctionBegin; 1965 its = its*lits; 1966 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 1967 1968 if (bs > 1) 1969 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 1970 1971 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1972 if (xx != bb) { 1973 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1974 } else { 1975 b = x; 1976 } 1977 1978 ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr); 1979 1980 if (flag & SOR_ZERO_INITIAL_GUESS) { 1981 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 1982 for (i=0; i<m; i++) 1983 t[i] = b[i]; 1984 1985 for (i=0; i<m; i++){ 1986 d = *(aa + ai[i]); /* diag[i] */ 1987 v = aa + ai[i] + 1; 1988 vj = aj + ai[i] + 1; 1989 nz = ai[i+1] - ai[i] - 1; 1990 x[i] = omega*t[i]/d; 1991 while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */ 1992 PetscLogFlops(2*nz-1); 1993 } 1994 } 1995 1996 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 1997 for (i=0; i<m; i++) 1998 t[i] = b[i]; 1999 2000 for (i=0; i<m-1; i++){ /* update rhs */ 2001 v = aa + ai[i] + 1; 2002 vj = aj + ai[i] + 1; 2003 nz = ai[i+1] - ai[i] - 1; 2004 while (nz--) t[*vj++] -= x[i]*(*v++); 2005 PetscLogFlops(2*nz-1); 2006 } 2007 for (i=m-1; i>=0; i--){ 2008 d = *(aa + ai[i]); 2009 v = aa + ai[i] + 1; 2010 vj = aj + ai[i] + 1; 2011 nz = ai[i+1] - ai[i] - 1; 2012 sum = t[i]; 2013 while (nz--) sum -= x[*vj++]*(*v++); 2014 PetscLogFlops(2*nz-1); 2015 x[i] = (1-omega)*x[i] + omega*sum/d; 2016 } 2017 } 2018 its--; 2019 } 2020 2021 while (its--) { 2022 /* 2023 forward sweep: 2024 for i=0,...,m-1: 2025 sum[i] = (b[i] - U(i,:)x )/d[i]; 2026 x[i] = (1-omega)x[i] + omega*sum[i]; 2027 b = b - x[i]*U^T(i,:); 2028 2029 */ 2030 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 2031 for (i=0; i<m; i++) 2032 t[i] = b[i]; 2033 2034 for (i=0; i<m; i++){ 2035 d = *(aa + ai[i]); /* diag[i] */ 2036 v = aa + ai[i] + 1; v1=v; 2037 vj = aj + ai[i] + 1; vj1=vj; 2038 nz = ai[i+1] - ai[i] - 1; nz1=nz; 2039 sum = t[i]; 2040 while (nz1--) sum -= (*v1++)*x[*vj1++]; 2041 x[i] = (1-omega)*x[i] + omega*sum/d; 2042 while (nz--) t[*vj++] -= x[i]*(*v++); 2043 PetscLogFlops(4*nz-2); 2044 } 2045 } 2046 2047 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2048 /* 2049 backward sweep: 2050 b = b - x[i]*U^T(i,:), i=0,...,n-2 2051 for i=m-1,...,0: 2052 sum[i] = (b[i] - U(i,:)x )/d[i]; 2053 x[i] = (1-omega)x[i] + omega*sum[i]; 2054 */ 2055 for (i=0; i<m; i++) 2056 t[i] = b[i]; 2057 2058 for (i=0; i<m-1; i++){ /* update rhs */ 2059 v = aa + ai[i] + 1; 2060 vj = aj + ai[i] + 1; 2061 nz = ai[i+1] - ai[i] - 1; 2062 while (nz--) t[*vj++] -= x[i]*(*v++); 2063 PetscLogFlops(2*nz-1); 2064 } 2065 for (i=m-1; i>=0; i--){ 2066 d = *(aa + ai[i]); 2067 v = aa + ai[i] + 1; 2068 vj = aj + ai[i] + 1; 2069 nz = ai[i+1] - ai[i] - 1; 2070 sum = t[i]; 2071 while (nz--) sum -= x[*vj++]*(*v++); 2072 PetscLogFlops(2*nz-1); 2073 x[i] = (1-omega)*x[i] + omega*sum/d; 2074 } 2075 } 2076 } 2077 2078 ierr = PetscFree(t); CHKERRQ(ierr); 2079 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2080 if (bb != xx) { 2081 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2082 } 2083 2084 PetscFunctionReturn(0); 2085 } 2086 2087 2088 2089 2090 2091 2092