1 /*$Id: baij.c,v 1.229 2001/06/21 23:47:10 buschelm Exp buschelm $*/ 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "petscsys.h" /*I "petscmat.h" I*/ 8 #include "src/mat/impls/baij/seq/baij.h" 9 #include "src/vec/vecimpl.h" 10 #include "src/inline/spops.h" 11 12 /* UGLY, ugly, ugly 13 When MatScalar == Scalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 14 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 15 inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 16 converts the entries into single precision and then calls ..._MatScalar() to put them 17 into the single precision data structures. 18 */ 19 #if defined(PETSC_USE_MAT_SINGLE) 20 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 21 #else 22 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 23 #endif 24 25 #define CHUNKSIZE 10 26 27 /* 28 Checks for missing diagonals 29 */ 30 #undef __FUNCT__ 31 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 32 int MatMissingDiagonal_SeqBAIJ(Mat A) 33 { 34 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 35 int *diag,*jj = a->j,i,ierr; 36 37 PetscFunctionBegin; 38 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 39 diag = a->diag; 40 for (i=0; i<a->mbs; i++) { 41 if (jj[diag[i]] != i) { 42 SETERRQ1(1,"Matrix is missing diagonal number %d",i); 43 } 44 } 45 PetscFunctionReturn(0); 46 } 47 48 #undef __FUNCT__ 49 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 50 int MatMarkDiagonal_SeqBAIJ(Mat A) 51 { 52 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 53 int i,j,*diag,m = a->mbs,ierr; 54 55 PetscFunctionBegin; 56 if (a->diag) PetscFunctionReturn(0); 57 58 ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr); 59 PetscLogObjectMemory(A,(m+1)*sizeof(int)); 60 for (i=0; i<m; i++) { 61 diag[i] = a->i[i+1]; 62 for (j=a->i[i]; j<a->i[i+1]; j++) { 63 if (a->j[j] == i) { 64 diag[i] = j; 65 break; 66 } 67 } 68 } 69 a->diag = diag; 70 PetscFunctionReturn(0); 71 } 72 73 74 EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 75 76 #undef __FUNCT__ 77 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 78 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 79 { 80 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 81 int ierr,n = a->mbs,i; 82 83 PetscFunctionBegin; 84 *nn = n; 85 if (!ia) PetscFunctionReturn(0); 86 if (symmetric) { 87 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 88 } else if (oshift == 1) { 89 /* temporarily add 1 to i and j indices */ 90 int nz = a->i[n]; 91 for (i=0; i<nz; i++) a->j[i]++; 92 for (i=0; i<n+1; i++) a->i[i]++; 93 *ia = a->i; *ja = a->j; 94 } else { 95 *ia = a->i; *ja = a->j; 96 } 97 98 PetscFunctionReturn(0); 99 } 100 101 #undef __FUNCT__ 102 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 103 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 104 { 105 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 106 int i,n = a->mbs,ierr; 107 108 PetscFunctionBegin; 109 if (!ia) PetscFunctionReturn(0); 110 if (symmetric) { 111 ierr = PetscFree(*ia);CHKERRQ(ierr); 112 ierr = PetscFree(*ja);CHKERRQ(ierr); 113 } else if (oshift == 1) { 114 int nz = a->i[n]-1; 115 for (i=0; i<nz; i++) a->j[i]--; 116 for (i=0; i<n+1; i++) a->i[i]--; 117 } 118 PetscFunctionReturn(0); 119 } 120 121 #undef __FUNCT__ 122 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ" 123 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs) 124 { 125 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 126 127 PetscFunctionBegin; 128 *bs = baij->bs; 129 PetscFunctionReturn(0); 130 } 131 132 133 #undef __FUNCT__ 134 #define __FUNCT__ "MatDestroy_SeqBAIJ" 135 int MatDestroy_SeqBAIJ(Mat A) 136 { 137 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 138 int ierr; 139 140 PetscFunctionBegin; 141 #if defined(PETSC_USE_LOG) 142 PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz); 143 #endif 144 ierr = PetscFree(a->a);CHKERRQ(ierr); 145 if (!a->singlemalloc) { 146 ierr = PetscFree(a->i);CHKERRQ(ierr); 147 ierr = PetscFree(a->j);CHKERRQ(ierr); 148 } 149 if (a->row) { 150 ierr = ISDestroy(a->row);CHKERRQ(ierr); 151 } 152 if (a->col) { 153 ierr = ISDestroy(a->col);CHKERRQ(ierr); 154 } 155 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 156 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 157 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 158 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 159 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 160 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 161 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 162 #if defined(PETSC_USE_MAT_SINGLE) 163 if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);} 164 #endif 165 ierr = PetscFree(a);CHKERRQ(ierr); 166 PetscFunctionReturn(0); 167 } 168 169 #undef __FUNCT__ 170 #define __FUNCT__ "MatSetOption_SeqBAIJ" 171 int MatSetOption_SeqBAIJ(Mat A,MatOption op) 172 { 173 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 174 175 PetscFunctionBegin; 176 switch (op) { 177 case MAT_ROW_ORIENTED: 178 a->roworiented = PETSC_TRUE; 179 break; 180 case MAT_COLUMN_ORIENTED: 181 a->roworiented = PETSC_FALSE; 182 break; 183 case MAT_COLUMNS_SORTED: 184 a->sorted = PETSC_TRUE; 185 break; 186 case MAT_COLUMNS_UNSORTED: 187 a->sorted = PETSC_FALSE; 188 break; 189 case MAT_KEEP_ZEROED_ROWS: 190 a->keepzeroedrows = PETSC_TRUE; 191 break; 192 case MAT_NO_NEW_NONZERO_LOCATIONS: 193 a->nonew = 1; 194 break; 195 case MAT_NEW_NONZERO_LOCATION_ERR: 196 a->nonew = -1; 197 break; 198 case MAT_NEW_NONZERO_ALLOCATION_ERR: 199 a->nonew = -2; 200 break; 201 case MAT_YES_NEW_NONZERO_LOCATIONS: 202 a->nonew = 0; 203 break; 204 case MAT_ROWS_SORTED: 205 case MAT_ROWS_UNSORTED: 206 case MAT_YES_NEW_DIAGONALS: 207 case MAT_IGNORE_OFF_PROC_ENTRIES: 208 case MAT_USE_HASH_TABLE: 209 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n"); 210 break; 211 case MAT_NO_NEW_DIAGONALS: 212 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 213 break; 214 case MAT_USE_SINGLE_PRECISION_SOLVES: 215 #if defined(PETSC_USE_MAT_SINGLE) 216 if (a->bs==4) { 217 PetscTruth sse_enabled; 218 int ierr; 219 ierr = PetscSSEIsEnabled(&sse_enabled);CHKERRQ(ierr); 220 if (sse_enabled) { 221 singleprecisionsolves = PETSC_TRUE; 222 } else { 223 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n"); 224 } 225 } else { 226 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n"); 227 } 228 #else 229 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n"); 230 #endif 231 break; 232 default: 233 SETERRQ(PETSC_ERR_SUP,"unknown option"); 234 } 235 PetscFunctionReturn(0); 236 } 237 238 #undef __FUNCT__ 239 #define __FUNCT__ "MatGetOwnershipRange_SeqBAIJ" 240 int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 241 { 242 PetscFunctionBegin; 243 if (m) *m = 0; 244 if (n) *n = A->m; 245 PetscFunctionReturn(0); 246 } 247 248 #undef __FUNCT__ 249 #define __FUNCT__ "MatGetRow_SeqBAIJ" 250 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 251 { 252 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 253 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr; 254 MatScalar *aa,*aa_i; 255 Scalar *v_i; 256 257 PetscFunctionBegin; 258 bs = a->bs; 259 ai = a->i; 260 aj = a->j; 261 aa = a->a; 262 bs2 = a->bs2; 263 264 if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range"); 265 266 bn = row/bs; /* Block number */ 267 bp = row % bs; /* Block Position */ 268 M = ai[bn+1] - ai[bn]; 269 *nz = bs*M; 270 271 if (v) { 272 *v = 0; 273 if (*nz) { 274 ierr = PetscMalloc((*nz)*sizeof(Scalar),v);CHKERRQ(ierr); 275 for (i=0; i<M; i++) { /* for each block in the block row */ 276 v_i = *v + i*bs; 277 aa_i = aa + bs2*(ai[bn] + i); 278 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 279 } 280 } 281 } 282 283 if (idx) { 284 *idx = 0; 285 if (*nz) { 286 ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr); 287 for (i=0; i<M; i++) { /* for each block in the block row */ 288 idx_i = *idx + i*bs; 289 itmp = bs*aj[ai[bn] + i]; 290 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 291 } 292 } 293 } 294 PetscFunctionReturn(0); 295 } 296 297 #undef __FUNCT__ 298 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 299 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 300 { 301 int ierr; 302 303 PetscFunctionBegin; 304 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 305 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 306 PetscFunctionReturn(0); 307 } 308 309 #undef __FUNCT__ 310 #define __FUNCT__ "MatTranspose_SeqBAIJ" 311 int MatTranspose_SeqBAIJ(Mat A,Mat *B) 312 { 313 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 314 Mat C; 315 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 316 int *rows,*cols,bs2=a->bs2; 317 Scalar *array; 318 319 PetscFunctionBegin; 320 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 321 ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr); 322 ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr); 323 324 #if defined(PETSC_USE_MAT_SINGLE) 325 ierr = PetscMalloc(a->bs2*a->nz*sizeof(Scalar),&array);CHKERRQ(ierr); 326 for (i=0; i<a->bs2*a->nz; i++) array[i] = (Scalar)a->a[i]; 327 #else 328 array = a->a; 329 #endif 330 331 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 332 ierr = MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);CHKERRQ(ierr); 333 ierr = PetscFree(col);CHKERRQ(ierr); 334 ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr); 335 cols = rows + bs; 336 for (i=0; i<mbs; i++) { 337 cols[0] = i*bs; 338 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 339 len = ai[i+1] - ai[i]; 340 for (j=0; j<len; j++) { 341 rows[0] = (*aj++)*bs; 342 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 343 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 344 array += bs2; 345 } 346 } 347 ierr = PetscFree(rows);CHKERRQ(ierr); 348 #if defined(PETSC_USE_MAT_SINGLE) 349 ierr = PetscFree(array); 350 #endif 351 352 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 353 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 354 355 if (B) { 356 *B = C; 357 } else { 358 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 359 } 360 PetscFunctionReturn(0); 361 } 362 363 #undef __FUNCT__ 364 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 365 static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 366 { 367 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 368 int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 369 Scalar *aa; 370 FILE *file; 371 372 PetscFunctionBegin; 373 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 374 ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr); 375 col_lens[0] = MAT_COOKIE; 376 377 col_lens[1] = A->m; 378 col_lens[2] = A->n; 379 col_lens[3] = a->nz*bs2; 380 381 /* store lengths of each row and write (including header) to file */ 382 count = 0; 383 for (i=0; i<a->mbs; i++) { 384 for (j=0; j<bs; j++) { 385 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 386 } 387 } 388 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr); 389 ierr = PetscFree(col_lens);CHKERRQ(ierr); 390 391 /* store column indices (zero start index) */ 392 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr); 393 count = 0; 394 for (i=0; i<a->mbs; i++) { 395 for (j=0; j<bs; j++) { 396 for (k=a->i[i]; k<a->i[i+1]; k++) { 397 for (l=0; l<bs; l++) { 398 jj[count++] = bs*a->j[k] + l; 399 } 400 } 401 } 402 } 403 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr); 404 ierr = PetscFree(jj);CHKERRQ(ierr); 405 406 /* store nonzero values */ 407 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(Scalar),&aa);CHKERRQ(ierr); 408 count = 0; 409 for (i=0; i<a->mbs; i++) { 410 for (j=0; j<bs; j++) { 411 for (k=a->i[i]; k<a->i[i+1]; k++) { 412 for (l=0; l<bs; l++) { 413 aa[count++] = a->a[bs2*k + l*bs + j]; 414 } 415 } 416 } 417 } 418 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr); 419 ierr = PetscFree(aa);CHKERRQ(ierr); 420 421 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 422 if (file) { 423 fprintf(file,"-matload_block_size %d\n",a->bs); 424 } 425 PetscFunctionReturn(0); 426 } 427 428 #undef __FUNCT__ 429 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 430 static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 431 { 432 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 433 int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2; 434 PetscViewerFormat format; 435 436 PetscFunctionBegin; 437 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 438 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_LONG) { 439 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 440 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 441 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 442 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 443 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 444 for (i=0; i<a->mbs; i++) { 445 for (j=0; j<bs; j++) { 446 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 447 for (k=a->i[i]; k<a->i[i+1]; k++) { 448 for (l=0; l<bs; l++) { 449 #if defined(PETSC_USE_COMPLEX) 450 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 451 ierr = PetscViewerASCIIPrintf(viewer," %d %g + %gi",bs*a->j[k]+l, 452 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 453 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 454 ierr = PetscViewerASCIIPrintf(viewer," %d %g - %gi",bs*a->j[k]+l, 455 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 456 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 457 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 458 } 459 #else 460 if (a->a[bs2*k + l*bs + j] != 0.0) { 461 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 462 } 463 #endif 464 } 465 } 466 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 467 } 468 } 469 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 470 } else { 471 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 472 for (i=0; i<a->mbs; i++) { 473 for (j=0; j<bs; j++) { 474 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 475 for (k=a->i[i]; k<a->i[i+1]; k++) { 476 for (l=0; l<bs; l++) { 477 #if defined(PETSC_USE_COMPLEX) 478 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 479 ierr = PetscViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l, 480 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 481 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 482 ierr = PetscViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l, 483 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 484 } else { 485 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 486 } 487 #else 488 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 489 #endif 490 } 491 } 492 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 493 } 494 } 495 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 496 } 497 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 498 PetscFunctionReturn(0); 499 } 500 501 #undef __FUNCT__ 502 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 503 static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 504 { 505 Mat A = (Mat) Aa; 506 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 507 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2; 508 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 509 MatScalar *aa; 510 PetscViewer viewer; 511 512 PetscFunctionBegin; 513 514 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 515 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 516 517 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 518 519 /* loop over matrix elements drawing boxes */ 520 color = PETSC_DRAW_BLUE; 521 for (i=0,row=0; i<mbs; i++,row+=bs) { 522 for (j=a->i[i]; j<a->i[i+1]; j++) { 523 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 524 x_l = a->j[j]*bs; x_r = x_l + 1.0; 525 aa = a->a + j*bs2; 526 for (k=0; k<bs; k++) { 527 for (l=0; l<bs; l++) { 528 if (PetscRealPart(*aa++) >= 0.) continue; 529 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 530 } 531 } 532 } 533 } 534 color = PETSC_DRAW_CYAN; 535 for (i=0,row=0; i<mbs; i++,row+=bs) { 536 for (j=a->i[i]; j<a->i[i+1]; j++) { 537 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 538 x_l = a->j[j]*bs; x_r = x_l + 1.0; 539 aa = a->a + j*bs2; 540 for (k=0; k<bs; k++) { 541 for (l=0; l<bs; l++) { 542 if (PetscRealPart(*aa++) != 0.) continue; 543 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 544 } 545 } 546 } 547 } 548 549 color = PETSC_DRAW_RED; 550 for (i=0,row=0; i<mbs; i++,row+=bs) { 551 for (j=a->i[i]; j<a->i[i+1]; j++) { 552 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 553 x_l = a->j[j]*bs; x_r = x_l + 1.0; 554 aa = a->a + j*bs2; 555 for (k=0; k<bs; k++) { 556 for (l=0; l<bs; l++) { 557 if (PetscRealPart(*aa++) <= 0.) continue; 558 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 559 } 560 } 561 } 562 } 563 PetscFunctionReturn(0); 564 } 565 566 #undef __FUNCT__ 567 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 568 static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 569 { 570 int ierr; 571 PetscReal xl,yl,xr,yr,w,h; 572 PetscDraw draw; 573 PetscTruth isnull; 574 575 PetscFunctionBegin; 576 577 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 578 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 579 580 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 581 xr = A->n; yr = A->m; h = yr/10.0; w = xr/10.0; 582 xr += w; yr += h; xl = -w; yl = -h; 583 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 584 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 585 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 586 PetscFunctionReturn(0); 587 } 588 589 #undef __FUNCT__ 590 #define __FUNCT__ "MatView_SeqBAIJ" 591 int MatView_SeqBAIJ(Mat A,PetscViewer viewer) 592 { 593 int ierr; 594 PetscTruth issocket,isascii,isbinary,isdraw; 595 596 PetscFunctionBegin; 597 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 598 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 599 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 600 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 601 if (issocket) { 602 SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported"); 603 } else if (isascii){ 604 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 605 } else if (isbinary) { 606 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 607 } else if (isdraw) { 608 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 609 } else { 610 SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name); 611 } 612 PetscFunctionReturn(0); 613 } 614 615 616 #undef __FUNCT__ 617 #define __FUNCT__ "MatGetValues_SeqBAIJ" 618 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v) 619 { 620 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 621 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 622 int *ai = a->i,*ailen = a->ilen; 623 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 624 MatScalar *ap,*aa = a->a,zero = 0.0; 625 626 PetscFunctionBegin; 627 for (k=0; k<m; k++) { /* loop over rows */ 628 row = im[k]; brow = row/bs; 629 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 630 if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 631 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 632 nrow = ailen[brow]; 633 for (l=0; l<n; l++) { /* loop over columns */ 634 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 635 if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 636 col = in[l] ; 637 bcol = col/bs; 638 cidx = col%bs; 639 ridx = row%bs; 640 high = nrow; 641 low = 0; /* assume unsorted */ 642 while (high-low > 5) { 643 t = (low+high)/2; 644 if (rp[t] > bcol) high = t; 645 else low = t; 646 } 647 for (i=low; i<high; i++) { 648 if (rp[i] > bcol) break; 649 if (rp[i] == bcol) { 650 *v++ = ap[bs2*i+bs*cidx+ridx]; 651 goto finished; 652 } 653 } 654 *v++ = zero; 655 finished:; 656 } 657 } 658 PetscFunctionReturn(0); 659 } 660 661 #if defined(PETSC_USE_MAT_SINGLE) 662 #undef __FUNCT__ 663 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 664 int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 665 { 666 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 667 int ierr,i,N = m*n*b->bs2; 668 MatScalar *vsingle; 669 670 PetscFunctionBegin; 671 if (N > b->setvalueslen) { 672 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 673 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 674 b->setvalueslen = N; 675 } 676 vsingle = b->setvaluescopy; 677 for (i=0; i<N; i++) { 678 vsingle[i] = v[i]; 679 } 680 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 681 PetscFunctionReturn(0); 682 } 683 #endif 684 685 686 #undef __FUNCT__ 687 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 688 int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,int *im,int n,int *in,MatScalar *v,InsertMode is) 689 { 690 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 691 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 692 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 693 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 694 PetscTruth roworiented=a->roworiented; 695 MatScalar *value = v,*ap,*aa = a->a,*bap; 696 697 PetscFunctionBegin; 698 if (roworiented) { 699 stepval = (n-1)*bs; 700 } else { 701 stepval = (m-1)*bs; 702 } 703 for (k=0; k<m; k++) { /* loop over added rows */ 704 row = im[k]; 705 if (row < 0) continue; 706 #if defined(PETSC_USE_BOPT_g) 707 if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 708 #endif 709 rp = aj + ai[row]; 710 ap = aa + bs2*ai[row]; 711 rmax = imax[row]; 712 nrow = ailen[row]; 713 low = 0; 714 for (l=0; l<n; l++) { /* loop over added columns */ 715 if (in[l] < 0) continue; 716 #if defined(PETSC_USE_BOPT_g) 717 if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 718 #endif 719 col = in[l]; 720 if (roworiented) { 721 value = v + k*(stepval+bs)*bs + l*bs; 722 } else { 723 value = v + l*(stepval+bs)*bs + k*bs; 724 } 725 if (!sorted) low = 0; high = nrow; 726 while (high-low > 7) { 727 t = (low+high)/2; 728 if (rp[t] > col) high = t; 729 else low = t; 730 } 731 for (i=low; i<high; i++) { 732 if (rp[i] > col) break; 733 if (rp[i] == col) { 734 bap = ap + bs2*i; 735 if (roworiented) { 736 if (is == ADD_VALUES) { 737 for (ii=0; ii<bs; ii++,value+=stepval) { 738 for (jj=ii; jj<bs2; jj+=bs) { 739 bap[jj] += *value++; 740 } 741 } 742 } else { 743 for (ii=0; ii<bs; ii++,value+=stepval) { 744 for (jj=ii; jj<bs2; jj+=bs) { 745 bap[jj] = *value++; 746 } 747 } 748 } 749 } else { 750 if (is == ADD_VALUES) { 751 for (ii=0; ii<bs; ii++,value+=stepval) { 752 for (jj=0; jj<bs; jj++) { 753 *bap++ += *value++; 754 } 755 } 756 } else { 757 for (ii=0; ii<bs; ii++,value+=stepval) { 758 for (jj=0; jj<bs; jj++) { 759 *bap++ = *value++; 760 } 761 } 762 } 763 } 764 goto noinsert2; 765 } 766 } 767 if (nonew == 1) goto noinsert2; 768 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 769 if (nrow >= rmax) { 770 /* there is no extra room in row, therefore enlarge */ 771 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 772 MatScalar *new_a; 773 774 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 775 776 /* malloc new storage space */ 777 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 778 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 779 new_j = (int*)(new_a + bs2*new_nz); 780 new_i = new_j + new_nz; 781 782 /* copy over old data into new slots */ 783 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 784 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 785 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 786 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 787 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 788 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 789 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 790 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 791 /* free up old matrix storage */ 792 ierr = PetscFree(a->a);CHKERRQ(ierr); 793 if (!a->singlemalloc) { 794 ierr = PetscFree(a->i);CHKERRQ(ierr); 795 ierr = PetscFree(a->j);CHKERRQ(ierr); 796 } 797 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 798 a->singlemalloc = PETSC_TRUE; 799 800 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 801 rmax = imax[row] = imax[row] + CHUNKSIZE; 802 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 803 a->maxnz += bs2*CHUNKSIZE; 804 a->reallocs++; 805 a->nz++; 806 } 807 N = nrow++ - 1; 808 /* shift up all the later entries in this row */ 809 for (ii=N; ii>=i; ii--) { 810 rp[ii+1] = rp[ii]; 811 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 812 } 813 if (N >= i) { 814 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 815 } 816 rp[i] = col; 817 bap = ap + bs2*i; 818 if (roworiented) { 819 for (ii=0; ii<bs; ii++,value+=stepval) { 820 for (jj=ii; jj<bs2; jj+=bs) { 821 bap[jj] = *value++; 822 } 823 } 824 } else { 825 for (ii=0; ii<bs; ii++,value+=stepval) { 826 for (jj=0; jj<bs; jj++) { 827 *bap++ = *value++; 828 } 829 } 830 } 831 noinsert2:; 832 low = i; 833 } 834 ailen[row] = nrow; 835 } 836 PetscFunctionReturn(0); 837 } 838 839 840 #undef __FUNCT__ 841 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 842 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 843 { 844 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 845 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 846 int m = A->m,*ip,N,*ailen = a->ilen; 847 int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 848 MatScalar *aa = a->a,*ap; 849 850 PetscFunctionBegin; 851 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 852 853 if (m) rmax = ailen[0]; 854 for (i=1; i<mbs; i++) { 855 /* move each row back by the amount of empty slots (fshift) before it*/ 856 fshift += imax[i-1] - ailen[i-1]; 857 rmax = PetscMax(rmax,ailen[i]); 858 if (fshift) { 859 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 860 N = ailen[i]; 861 for (j=0; j<N; j++) { 862 ip[j-fshift] = ip[j]; 863 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 864 } 865 } 866 ai[i] = ai[i-1] + ailen[i-1]; 867 } 868 if (mbs) { 869 fshift += imax[mbs-1] - ailen[mbs-1]; 870 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 871 } 872 /* reset ilen and imax for each row */ 873 for (i=0; i<mbs; i++) { 874 ailen[i] = imax[i] = ai[i+1] - ai[i]; 875 } 876 a->nz = ai[mbs]; 877 878 /* diagonals may have moved, so kill the diagonal pointers */ 879 if (fshift && a->diag) { 880 ierr = PetscFree(a->diag);CHKERRQ(ierr); 881 PetscLogObjectMemory(A,-(m+1)*sizeof(int)); 882 a->diag = 0; 883 } 884 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 885 m,A->n,a->bs,fshift*bs2,a->nz*bs2); 886 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n", 887 a->reallocs); 888 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 889 a->reallocs = 0; 890 A->info.nz_unneeded = (PetscReal)fshift*bs2; 891 892 PetscFunctionReturn(0); 893 } 894 895 896 897 /* 898 This function returns an array of flags which indicate the locations of contiguous 899 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 900 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 901 Assume: sizes should be long enough to hold all the values. 902 */ 903 #undef __FUNCT__ 904 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 905 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 906 { 907 int i,j,k,row; 908 PetscTruth flg; 909 910 PetscFunctionBegin; 911 for (i=0,j=0; i<n; j++) { 912 row = idx[i]; 913 if (row%bs!=0) { /* Not the begining of a block */ 914 sizes[j] = 1; 915 i++; 916 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 917 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 918 i++; 919 } else { /* Begining of the block, so check if the complete block exists */ 920 flg = PETSC_TRUE; 921 for (k=1; k<bs; k++) { 922 if (row+k != idx[i+k]) { /* break in the block */ 923 flg = PETSC_FALSE; 924 break; 925 } 926 } 927 if (flg == PETSC_TRUE) { /* No break in the bs */ 928 sizes[j] = bs; 929 i+= bs; 930 } else { 931 sizes[j] = 1; 932 i++; 933 } 934 } 935 } 936 *bs_max = j; 937 PetscFunctionReturn(0); 938 } 939 940 #undef __FUNCT__ 941 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 942 int MatZeroRows_SeqBAIJ(Mat A,IS is,Scalar *diag) 943 { 944 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 945 int ierr,i,j,k,count,is_n,*is_idx,*rows; 946 int bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max; 947 Scalar zero = 0.0; 948 MatScalar *aa; 949 950 PetscFunctionBegin; 951 /* Make a copy of the IS and sort it */ 952 ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr); 953 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 954 955 /* allocate memory for rows,sizes */ 956 ierr = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr); 957 sizes = rows + is_n; 958 959 /* copy IS values to rows, and sort them */ 960 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 961 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 962 if (baij->keepzeroedrows) { 963 for (i=0; i<is_n; i++) { sizes[i] = 1; } 964 bs_max = is_n; 965 } else { 966 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 967 } 968 ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 969 970 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 971 row = rows[j]; 972 if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row); 973 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 974 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 975 if (sizes[i] == bs && !baij->keepzeroedrows) { 976 if (diag) { 977 if (baij->ilen[row/bs] > 0) { 978 baij->ilen[row/bs] = 1; 979 baij->j[baij->i[row/bs]] = row/bs; 980 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 981 } 982 /* Now insert all the diagonal values for this bs */ 983 for (k=0; k<bs; k++) { 984 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 985 } 986 } else { /* (!diag) */ 987 baij->ilen[row/bs] = 0; 988 } /* end (!diag) */ 989 } else { /* (sizes[i] != bs) */ 990 #if defined (PETSC_USE_DEBUG) 991 if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1"); 992 #endif 993 for (k=0; k<count; k++) { 994 aa[0] = zero; 995 aa += bs; 996 } 997 if (diag) { 998 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 999 } 1000 } 1001 } 1002 1003 ierr = PetscFree(rows);CHKERRQ(ierr); 1004 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1005 PetscFunctionReturn(0); 1006 } 1007 1008 #undef __FUNCT__ 1009 #define __FUNCT__ "MatSetValues_SeqBAIJ" 1010 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 1011 { 1012 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1013 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 1014 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1015 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 1016 int ridx,cidx,bs2=a->bs2,ierr; 1017 PetscTruth roworiented=a->roworiented; 1018 MatScalar *ap,value,*aa=a->a,*bap; 1019 1020 PetscFunctionBegin; 1021 for (k=0; k<m; k++) { /* loop over added rows */ 1022 row = im[k]; brow = row/bs; 1023 if (row < 0) continue; 1024 #if defined(PETSC_USE_BOPT_g) 1025 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m); 1026 #endif 1027 rp = aj + ai[brow]; 1028 ap = aa + bs2*ai[brow]; 1029 rmax = imax[brow]; 1030 nrow = ailen[brow]; 1031 low = 0; 1032 for (l=0; l<n; l++) { /* loop over added columns */ 1033 if (in[l] < 0) continue; 1034 #if defined(PETSC_USE_BOPT_g) 1035 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n); 1036 #endif 1037 col = in[l]; bcol = col/bs; 1038 ridx = row % bs; cidx = col % bs; 1039 if (roworiented) { 1040 value = v[l + k*n]; 1041 } else { 1042 value = v[k + l*m]; 1043 } 1044 if (!sorted) low = 0; high = nrow; 1045 while (high-low > 7) { 1046 t = (low+high)/2; 1047 if (rp[t] > bcol) high = t; 1048 else low = t; 1049 } 1050 for (i=low; i<high; i++) { 1051 if (rp[i] > bcol) break; 1052 if (rp[i] == bcol) { 1053 bap = ap + bs2*i + bs*cidx + ridx; 1054 if (is == ADD_VALUES) *bap += value; 1055 else *bap = value; 1056 goto noinsert1; 1057 } 1058 } 1059 if (nonew == 1) goto noinsert1; 1060 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 1061 if (nrow >= rmax) { 1062 /* there is no extra room in row, therefore enlarge */ 1063 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1064 MatScalar *new_a; 1065 1066 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 1067 1068 /* Malloc new storage space */ 1069 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 1070 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 1071 new_j = (int*)(new_a + bs2*new_nz); 1072 new_i = new_j + new_nz; 1073 1074 /* copy over old data into new slots */ 1075 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 1076 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1077 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 1078 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 1079 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 1080 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1081 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1082 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1083 /* free up old matrix storage */ 1084 ierr = PetscFree(a->a);CHKERRQ(ierr); 1085 if (!a->singlemalloc) { 1086 ierr = PetscFree(a->i);CHKERRQ(ierr); 1087 ierr = PetscFree(a->j);CHKERRQ(ierr); 1088 } 1089 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1090 a->singlemalloc = PETSC_TRUE; 1091 1092 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1093 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 1094 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 1095 a->maxnz += bs2*CHUNKSIZE; 1096 a->reallocs++; 1097 a->nz++; 1098 } 1099 N = nrow++ - 1; 1100 /* shift up all the later entries in this row */ 1101 for (ii=N; ii>=i; ii--) { 1102 rp[ii+1] = rp[ii]; 1103 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1104 } 1105 if (N>=i) { 1106 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1107 } 1108 rp[i] = bcol; 1109 ap[bs2*i + bs*cidx + ridx] = value; 1110 noinsert1:; 1111 low = i; 1112 } 1113 ailen[brow] = nrow; 1114 } 1115 PetscFunctionReturn(0); 1116 } 1117 1118 EXTERN int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,MatLUInfo*,Mat*); 1119 EXTERN int MatLUFactor_SeqBAIJ(Mat,IS,IS,MatLUInfo*); 1120 EXTERN int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int); 1121 EXTERN int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,int,MatReuse,Mat*); 1122 EXTERN int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatReuse,Mat**); 1123 EXTERN int MatMultTranspose_SeqBAIJ(Mat,Vec,Vec); 1124 EXTERN int MatMultTransposeAdd_SeqBAIJ(Mat,Vec,Vec,Vec); 1125 EXTERN int MatScale_SeqBAIJ(Scalar*,Mat); 1126 EXTERN int MatNorm_SeqBAIJ(Mat,NormType,PetscReal *); 1127 EXTERN int MatEqual_SeqBAIJ(Mat,Mat,PetscTruth*); 1128 EXTERN int MatGetDiagonal_SeqBAIJ(Mat,Vec); 1129 EXTERN int MatDiagonalScale_SeqBAIJ(Mat,Vec,Vec); 1130 EXTERN int MatGetInfo_SeqBAIJ(Mat,MatInfoType,MatInfo *); 1131 EXTERN int MatZeroEntries_SeqBAIJ(Mat); 1132 1133 EXTERN int MatSolve_SeqBAIJ_N(Mat,Vec,Vec); 1134 EXTERN int MatSolve_SeqBAIJ_1(Mat,Vec,Vec); 1135 EXTERN int MatSolve_SeqBAIJ_2(Mat,Vec,Vec); 1136 EXTERN int MatSolve_SeqBAIJ_3(Mat,Vec,Vec); 1137 EXTERN int MatSolve_SeqBAIJ_4(Mat,Vec,Vec); 1138 EXTERN int MatSolve_SeqBAIJ_5(Mat,Vec,Vec); 1139 EXTERN int MatSolve_SeqBAIJ_6(Mat,Vec,Vec); 1140 EXTERN int MatSolve_SeqBAIJ_7(Mat,Vec,Vec); 1141 EXTERN int MatSolveTranspose_SeqBAIJ_7(Mat,Vec,Vec); 1142 EXTERN int MatSolveTranspose_SeqBAIJ_6(Mat,Vec,Vec); 1143 EXTERN int MatSolveTranspose_SeqBAIJ_5(Mat,Vec,Vec); 1144 EXTERN int MatSolveTranspose_SeqBAIJ_4(Mat,Vec,Vec); 1145 EXTERN int MatSolveTranspose_SeqBAIJ_3(Mat,Vec,Vec); 1146 EXTERN int MatSolveTranspose_SeqBAIJ_2(Mat,Vec,Vec); 1147 EXTERN int MatSolveTranspose_SeqBAIJ_1(Mat,Vec,Vec); 1148 1149 EXTERN int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*); 1150 EXTERN int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*); 1151 EXTERN int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*); 1152 EXTERN int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*); 1153 EXTERN int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*); 1154 EXTERN int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*); 1155 EXTERN int MatLUFactorNumeric_SeqBAIJ_6(Mat,Mat*); 1156 1157 EXTERN int MatMult_SeqBAIJ_1(Mat,Vec,Vec); 1158 EXTERN int MatMult_SeqBAIJ_2(Mat,Vec,Vec); 1159 EXTERN int MatMult_SeqBAIJ_3(Mat,Vec,Vec); 1160 EXTERN int MatMult_SeqBAIJ_4(Mat,Vec,Vec); 1161 EXTERN int MatMult_SeqBAIJ_5(Mat,Vec,Vec); 1162 EXTERN int MatMult_SeqBAIJ_6(Mat,Vec,Vec); 1163 EXTERN int MatMult_SeqBAIJ_7(Mat,Vec,Vec); 1164 EXTERN int MatMult_SeqBAIJ_N(Mat,Vec,Vec); 1165 1166 EXTERN int MatMultAdd_SeqBAIJ_1(Mat,Vec,Vec,Vec); 1167 EXTERN int MatMultAdd_SeqBAIJ_2(Mat,Vec,Vec,Vec); 1168 EXTERN int MatMultAdd_SeqBAIJ_3(Mat,Vec,Vec,Vec); 1169 EXTERN int MatMultAdd_SeqBAIJ_4(Mat,Vec,Vec,Vec); 1170 EXTERN int MatMultAdd_SeqBAIJ_5(Mat,Vec,Vec,Vec); 1171 EXTERN int MatMultAdd_SeqBAIJ_6(Mat,Vec,Vec,Vec); 1172 EXTERN int MatMultAdd_SeqBAIJ_7(Mat,Vec,Vec,Vec); 1173 EXTERN int MatMultAdd_SeqBAIJ_N(Mat,Vec,Vec,Vec); 1174 1175 #undef __FUNCT__ 1176 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 1177 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info) 1178 { 1179 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1180 Mat outA; 1181 int ierr; 1182 PetscTruth row_identity,col_identity; 1183 1184 PetscFunctionBegin; 1185 if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1186 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1187 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1188 if (!row_identity || !col_identity) { 1189 SETERRQ(1,"Row and column permutations must be identity for in-place ILU"); 1190 } 1191 1192 outA = inA; 1193 inA->factor = FACTOR_LU; 1194 1195 if (!a->diag) { 1196 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1197 } 1198 /* 1199 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1200 for ILU(0) factorization with natural ordering 1201 */ 1202 switch (a->bs) { 1203 case 1: 1204 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 1205 inA->ops->solve = MatSolve_SeqBAIJ_1_NaturalOrdering; 1206 inA->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_1_NaturalOrdering; 1207 PetscLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=1\n"); 1208 case 2: 1209 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering; 1210 inA->ops->solve = MatSolve_SeqBAIJ_2_NaturalOrdering; 1211 inA->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_2_NaturalOrdering; 1212 PetscLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 1213 break; 1214 case 3: 1215 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering; 1216 inA->ops->solve = MatSolve_SeqBAIJ_3_NaturalOrdering; 1217 inA->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_3_NaturalOrdering; 1218 PetscLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=3\n"); 1219 break; 1220 case 4: 1221 #if defined(PETSC_HAVE_SSE) 1222 PetscTruth sse_enabled; 1223 ierr = PetscSSEIsEnabled(&sse_enabled);CHKERRQ(ierr); 1224 if (sse_enabled) { 1225 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE; 1226 } else { 1227 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering; 1228 } 1229 #else 1230 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering; 1231 #endif 1232 inA->ops->solve = MatSolve_SeqBAIJ_4_NaturalOrdering; 1233 inA->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_4_NaturalOrdering; 1234 PetscLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 1235 break; 1236 case 5: 1237 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering; 1238 inA->ops->solve = MatSolve_SeqBAIJ_5_NaturalOrdering; 1239 inA->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_5_NaturalOrdering; 1240 PetscLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 1241 break; 1242 case 6: 1243 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering; 1244 inA->ops->solve = MatSolve_SeqBAIJ_6_NaturalOrdering; 1245 inA->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_6_NaturalOrdering; 1246 PetscLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 1247 break; 1248 case 7: 1249 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering; 1250 inA->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_7_NaturalOrdering; 1251 inA->ops->solve = MatSolve_SeqBAIJ_7_NaturalOrdering; 1252 PetscLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 1253 break; 1254 default: 1255 a->row = row; 1256 a->col = col; 1257 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1258 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1259 1260 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1261 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1262 PetscLogObjectParent(inA,a->icol); 1263 1264 if (!a->solve_work) { 1265 ierr = PetscMalloc((inA->m+a->bs)*sizeof(Scalar),&a->solve_work);CHKERRQ(ierr); 1266 PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(Scalar)); 1267 } 1268 } 1269 1270 ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr); 1271 1272 PetscFunctionReturn(0); 1273 } 1274 #undef __FUNCT__ 1275 #define __FUNCT__ "MatPrintHelp_SeqBAIJ" 1276 int MatPrintHelp_SeqBAIJ(Mat A) 1277 { 1278 static PetscTruth called = PETSC_FALSE; 1279 MPI_Comm comm = A->comm; 1280 int ierr; 1281 1282 PetscFunctionBegin; 1283 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1284 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1285 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1286 PetscFunctionReturn(0); 1287 } 1288 1289 EXTERN_C_BEGIN 1290 #undef __FUNCT__ 1291 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1292 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices) 1293 { 1294 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1295 int i,nz,n; 1296 1297 PetscFunctionBegin; 1298 nz = baij->maxnz; 1299 n = mat->n; 1300 for (i=0; i<nz; i++) { 1301 baij->j[i] = indices[i]; 1302 } 1303 baij->nz = nz; 1304 for (i=0; i<n; i++) { 1305 baij->ilen[i] = baij->imax[i]; 1306 } 1307 1308 PetscFunctionReturn(0); 1309 } 1310 EXTERN_C_END 1311 1312 #undef __FUNCT__ 1313 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 1314 /*@ 1315 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1316 in the matrix. 1317 1318 Input Parameters: 1319 + mat - the SeqBAIJ matrix 1320 - indices - the column indices 1321 1322 Level: advanced 1323 1324 Notes: 1325 This can be called if you have precomputed the nonzero structure of the 1326 matrix and want to provide it to the matrix object to improve the performance 1327 of the MatSetValues() operation. 1328 1329 You MUST have set the correct numbers of nonzeros per row in the call to 1330 MatCreateSeqBAIJ(). 1331 1332 MUST be called before any calls to MatSetValues(); 1333 1334 @*/ 1335 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices) 1336 { 1337 int ierr,(*f)(Mat,int *); 1338 1339 PetscFunctionBegin; 1340 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1341 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)())&f);CHKERRQ(ierr); 1342 if (f) { 1343 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1344 } else { 1345 SETERRQ(1,"Wrong type of matrix to set column indices"); 1346 } 1347 PetscFunctionReturn(0); 1348 } 1349 1350 #undef __FUNCT__ 1351 #define __FUNCT__ "MatGetRowMax_SeqBAIJ" 1352 int MatGetRowMax_SeqBAIJ(Mat A,Vec v) 1353 { 1354 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1355 int ierr,i,j,n,row,bs,*ai,*aj,mbs; 1356 PetscReal atmp; 1357 Scalar *x,zero = 0.0; 1358 MatScalar *aa; 1359 int ncols,brow,krow,kcol; 1360 1361 PetscFunctionBegin; 1362 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1363 bs = a->bs; 1364 aa = a->a; 1365 ai = a->i; 1366 aj = a->j; 1367 mbs = a->mbs; 1368 1369 ierr = VecSet(&zero,v);CHKERRQ(ierr); 1370 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1371 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1372 if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1373 for (i=0; i<mbs; i++) { 1374 ncols = ai[1] - ai[0]; ai++; 1375 brow = bs*i; 1376 for (j=0; j<ncols; j++){ 1377 /* bcol = bs*(*aj); */ 1378 for (kcol=0; kcol<bs; kcol++){ 1379 for (krow=0; krow<bs; krow++){ 1380 atmp = PetscAbsScalar(*aa); aa++; 1381 row = brow + krow; /* row index */ 1382 /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */ 1383 if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp; 1384 } 1385 } 1386 aj++; 1387 } 1388 } 1389 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1390 PetscFunctionReturn(0); 1391 } 1392 1393 #undef __FUNCT__ 1394 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1395 int MatSetUpPreallocation_SeqBAIJ(Mat A) 1396 { 1397 int ierr; 1398 1399 PetscFunctionBegin; 1400 ierr = MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1401 PetscFunctionReturn(0); 1402 } 1403 1404 #undef __FUNCT__ 1405 #define __FUNCT__ "MatGetArray_SeqBAIJ" 1406 int MatGetArray_SeqBAIJ(Mat A,Scalar **array) 1407 { 1408 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1409 PetscFunctionBegin; 1410 *array = a->a; 1411 PetscFunctionReturn(0); 1412 } 1413 1414 #undef __FUNCT__ 1415 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 1416 int MatRestoreArray_SeqBAIJ(Mat A,Scalar **array) 1417 { 1418 PetscFunctionBegin; 1419 PetscFunctionReturn(0); 1420 } 1421 1422 /* -------------------------------------------------------------------*/ 1423 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 1424 MatGetRow_SeqBAIJ, 1425 MatRestoreRow_SeqBAIJ, 1426 MatMult_SeqBAIJ_N, 1427 MatMultAdd_SeqBAIJ_N, 1428 MatMultTranspose_SeqBAIJ, 1429 MatMultTransposeAdd_SeqBAIJ, 1430 MatSolve_SeqBAIJ_N, 1431 0, 1432 0, 1433 0, 1434 MatLUFactor_SeqBAIJ, 1435 0, 1436 0, 1437 MatTranspose_SeqBAIJ, 1438 MatGetInfo_SeqBAIJ, 1439 MatEqual_SeqBAIJ, 1440 MatGetDiagonal_SeqBAIJ, 1441 MatDiagonalScale_SeqBAIJ, 1442 MatNorm_SeqBAIJ, 1443 0, 1444 MatAssemblyEnd_SeqBAIJ, 1445 0, 1446 MatSetOption_SeqBAIJ, 1447 MatZeroEntries_SeqBAIJ, 1448 MatZeroRows_SeqBAIJ, 1449 MatLUFactorSymbolic_SeqBAIJ, 1450 MatLUFactorNumeric_SeqBAIJ_N, 1451 0, 1452 0, 1453 MatSetUpPreallocation_SeqBAIJ, 1454 0, 1455 MatGetOwnershipRange_SeqBAIJ, 1456 MatILUFactorSymbolic_SeqBAIJ, 1457 0, 1458 MatGetArray_SeqBAIJ, 1459 MatRestoreArray_SeqBAIJ, 1460 MatDuplicate_SeqBAIJ, 1461 0, 1462 0, 1463 MatILUFactor_SeqBAIJ, 1464 0, 1465 0, 1466 MatGetSubMatrices_SeqBAIJ, 1467 MatIncreaseOverlap_SeqBAIJ, 1468 MatGetValues_SeqBAIJ, 1469 0, 1470 MatPrintHelp_SeqBAIJ, 1471 MatScale_SeqBAIJ, 1472 0, 1473 0, 1474 0, 1475 MatGetBlockSize_SeqBAIJ, 1476 MatGetRowIJ_SeqBAIJ, 1477 MatRestoreRowIJ_SeqBAIJ, 1478 0, 1479 0, 1480 0, 1481 0, 1482 0, 1483 0, 1484 MatSetValuesBlocked_SeqBAIJ, 1485 MatGetSubMatrix_SeqBAIJ, 1486 MatDestroy_SeqBAIJ, 1487 MatView_SeqBAIJ, 1488 MatGetMaps_Petsc, 1489 0, 1490 0, 1491 0, 1492 0, 1493 0, 1494 0, 1495 MatGetRowMax_SeqBAIJ, 1496 MatConvert_Basic}; 1497 1498 EXTERN_C_BEGIN 1499 #undef __FUNCT__ 1500 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 1501 int MatStoreValues_SeqBAIJ(Mat mat) 1502 { 1503 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1504 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1505 int ierr; 1506 1507 PetscFunctionBegin; 1508 if (aij->nonew != 1) { 1509 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1510 } 1511 1512 /* allocate space for values if not already there */ 1513 if (!aij->saved_values) { 1514 ierr = PetscMalloc((nz+1)*sizeof(Scalar),&aij->saved_values);CHKERRQ(ierr); 1515 } 1516 1517 /* copy values over */ 1518 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(Scalar));CHKERRQ(ierr); 1519 PetscFunctionReturn(0); 1520 } 1521 EXTERN_C_END 1522 1523 EXTERN_C_BEGIN 1524 #undef __FUNCT__ 1525 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 1526 int MatRetrieveValues_SeqBAIJ(Mat mat) 1527 { 1528 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1529 int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr; 1530 1531 PetscFunctionBegin; 1532 if (aij->nonew != 1) { 1533 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1534 } 1535 if (!aij->saved_values) { 1536 SETERRQ(1,"Must call MatStoreValues(A);first"); 1537 } 1538 1539 /* copy values over */ 1540 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(Scalar));CHKERRQ(ierr); 1541 PetscFunctionReturn(0); 1542 } 1543 EXTERN_C_END 1544 1545 EXTERN_C_BEGIN 1546 extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,Mat*); 1547 EXTERN_C_END 1548 1549 EXTERN_C_BEGIN 1550 #undef __FUNCT__ 1551 #define __FUNCT__ "MatCreate_SeqBAIJ" 1552 int MatCreate_SeqBAIJ(Mat B) 1553 { 1554 int ierr,size; 1555 Mat_SeqBAIJ *b; 1556 1557 PetscFunctionBegin; 1558 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1559 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1560 1561 B->m = B->M = PetscMax(B->m,B->M); 1562 B->n = B->N = PetscMax(B->n,B->N); 1563 ierr = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr); 1564 B->data = (void*)b; 1565 ierr = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr); 1566 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1567 B->factor = 0; 1568 B->lupivotthreshold = 1.0; 1569 B->mapping = 0; 1570 b->row = 0; 1571 b->col = 0; 1572 b->icol = 0; 1573 b->reallocs = 0; 1574 b->saved_values = 0; 1575 #if defined(PETSC_USE_MAT_SINGLE) 1576 b->setvalueslen = 0; 1577 b->setvaluescopy = PETSC_NULL; 1578 #endif 1579 1580 ierr = MapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr); 1581 ierr = MapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr); 1582 1583 b->sorted = PETSC_FALSE; 1584 b->roworiented = PETSC_TRUE; 1585 b->nonew = 0; 1586 b->diag = 0; 1587 b->solve_work = 0; 1588 b->mult_work = 0; 1589 b->spptr = 0; 1590 B->info.nz_unneeded = (PetscReal)b->maxnz; 1591 b->keepzeroedrows = PETSC_FALSE; 1592 1593 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1594 "MatStoreValues_SeqBAIJ", 1595 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 1596 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1597 "MatRetrieveValues_SeqBAIJ", 1598 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 1599 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 1600 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 1601 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 1602 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_SeqBAIJ_SeqAIJ_C", 1603 "MatConvert_SeqBAIJ_SeqAIJ", 1604 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 1605 PetscFunctionReturn(0); 1606 } 1607 EXTERN_C_END 1608 1609 #undef __FUNCT__ 1610 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 1611 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1612 { 1613 Mat C; 1614 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 1615 int i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr; 1616 1617 PetscFunctionBegin; 1618 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1619 1620 *B = 0; 1621 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1622 ierr = MatSetType(C,MATSEQBAIJ);CHKERRQ(ierr); 1623 c = (Mat_SeqBAIJ*)C->data; 1624 1625 c->bs = a->bs; 1626 c->bs2 = a->bs2; 1627 c->mbs = a->mbs; 1628 c->nbs = a->nbs; 1629 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1630 1631 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 1632 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 1633 for (i=0; i<mbs; i++) { 1634 c->imax[i] = a->imax[i]; 1635 c->ilen[i] = a->ilen[i]; 1636 } 1637 1638 /* allocate the matrix space */ 1639 c->singlemalloc = PETSC_TRUE; 1640 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1641 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1642 c->j = (int*)(c->a + nz*bs2); 1643 c->i = c->j + nz; 1644 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1645 if (mbs > 0) { 1646 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1647 if (cpvalues == MAT_COPY_VALUES) { 1648 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1649 } else { 1650 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1651 } 1652 } 1653 1654 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1655 c->sorted = a->sorted; 1656 c->roworiented = a->roworiented; 1657 c->nonew = a->nonew; 1658 1659 if (a->diag) { 1660 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 1661 PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 1662 for (i=0; i<mbs; i++) { 1663 c->diag[i] = a->diag[i]; 1664 } 1665 } else c->diag = 0; 1666 c->nz = a->nz; 1667 c->maxnz = a->maxnz; 1668 c->solve_work = 0; 1669 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1670 c->mult_work = 0; 1671 C->preallocated = PETSC_TRUE; 1672 C->assembled = PETSC_TRUE; 1673 *B = C; 1674 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1675 PetscFunctionReturn(0); 1676 } 1677 1678 EXTERN_C_BEGIN 1679 #undef __FUNCT__ 1680 #define __FUNCT__ "MatLoad_SeqBAIJ" 1681 int MatLoad_SeqBAIJ(PetscViewer viewer,MatType type,Mat *A) 1682 { 1683 Mat_SeqBAIJ *a; 1684 Mat B; 1685 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1686 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 1687 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1688 int *masked,nmask,tmp,bs2,ishift; 1689 Scalar *aa; 1690 MPI_Comm comm = ((PetscObject)viewer)->comm; 1691 1692 PetscFunctionBegin; 1693 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1694 bs2 = bs*bs; 1695 1696 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1697 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1698 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1699 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1700 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1701 M = header[1]; N = header[2]; nz = header[3]; 1702 1703 if (header[3] < 0) { 1704 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 1705 } 1706 1707 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1708 1709 /* 1710 This code adds extra rows to make sure the number of rows is 1711 divisible by the blocksize 1712 */ 1713 mbs = M/bs; 1714 extra_rows = bs - M + bs*(mbs); 1715 if (extra_rows == bs) extra_rows = 0; 1716 else mbs++; 1717 if (extra_rows) { 1718 PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 1719 } 1720 1721 /* read in row lengths */ 1722 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 1723 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1724 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1725 1726 /* read in column indices */ 1727 ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 1728 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1729 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1730 1731 /* loop over row lengths determining block row lengths */ 1732 ierr = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr); 1733 ierr = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1734 ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 1735 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1736 masked = mask + mbs; 1737 rowcount = 0; nzcount = 0; 1738 for (i=0; i<mbs; i++) { 1739 nmask = 0; 1740 for (j=0; j<bs; j++) { 1741 kmax = rowlengths[rowcount]; 1742 for (k=0; k<kmax; k++) { 1743 tmp = jj[nzcount++]/bs; 1744 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1745 } 1746 rowcount++; 1747 } 1748 browlengths[i] += nmask; 1749 /* zero out the mask elements we set */ 1750 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1751 } 1752 1753 /* create our matrix */ 1754 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr); 1755 B = *A; 1756 a = (Mat_SeqBAIJ*)B->data; 1757 1758 /* set matrix "i" values */ 1759 a->i[0] = 0; 1760 for (i=1; i<= mbs; i++) { 1761 a->i[i] = a->i[i-1] + browlengths[i-1]; 1762 a->ilen[i-1] = browlengths[i-1]; 1763 } 1764 a->nz = 0; 1765 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 1766 1767 /* read in nonzero values */ 1768 ierr = PetscMalloc((nz+extra_rows)*sizeof(Scalar),&aa);CHKERRQ(ierr); 1769 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1770 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1771 1772 /* set "a" and "j" values into matrix */ 1773 nzcount = 0; jcount = 0; 1774 for (i=0; i<mbs; i++) { 1775 nzcountb = nzcount; 1776 nmask = 0; 1777 for (j=0; j<bs; j++) { 1778 kmax = rowlengths[i*bs+j]; 1779 for (k=0; k<kmax; k++) { 1780 tmp = jj[nzcount++]/bs; 1781 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 1782 } 1783 } 1784 /* sort the masked values */ 1785 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1786 1787 /* set "j" values into matrix */ 1788 maskcount = 1; 1789 for (j=0; j<nmask; j++) { 1790 a->j[jcount++] = masked[j]; 1791 mask[masked[j]] = maskcount++; 1792 } 1793 /* set "a" values into matrix */ 1794 ishift = bs2*a->i[i]; 1795 for (j=0; j<bs; j++) { 1796 kmax = rowlengths[i*bs+j]; 1797 for (k=0; k<kmax; k++) { 1798 tmp = jj[nzcountb]/bs ; 1799 block = mask[tmp] - 1; 1800 point = jj[nzcountb] - bs*tmp; 1801 idx = ishift + bs2*block + j + bs*point; 1802 a->a[idx] = (MatScalar)aa[nzcountb++]; 1803 } 1804 } 1805 /* zero out the mask elements we set */ 1806 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1807 } 1808 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1809 1810 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1811 ierr = PetscFree(browlengths);CHKERRQ(ierr); 1812 ierr = PetscFree(aa);CHKERRQ(ierr); 1813 ierr = PetscFree(jj);CHKERRQ(ierr); 1814 ierr = PetscFree(mask);CHKERRQ(ierr); 1815 1816 B->assembled = PETSC_TRUE; 1817 1818 ierr = MatView_Private(B);CHKERRQ(ierr); 1819 PetscFunctionReturn(0); 1820 } 1821 EXTERN_C_END 1822 1823 #undef __FUNCT__ 1824 #define __FUNCT__ "MatCreateSeqBAIJ" 1825 /*@C 1826 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 1827 compressed row) format. For good matrix assembly performance the 1828 user should preallocate the matrix storage by setting the parameter nz 1829 (or the array nnz). By setting these parameters accurately, performance 1830 during matrix assembly can be increased by more than a factor of 50. 1831 1832 Collective on MPI_Comm 1833 1834 Input Parameters: 1835 + comm - MPI communicator, set to PETSC_COMM_SELF 1836 . bs - size of block 1837 . m - number of rows 1838 . n - number of columns 1839 . nz - number of nonzero blocks per block row (same for all rows) 1840 - nnz - array containing the number of nonzero blocks in the various block rows 1841 (possibly different for each block row) or PETSC_NULL 1842 1843 Output Parameter: 1844 . A - the matrix 1845 1846 Options Database Keys: 1847 . -mat_no_unroll - uses code that does not unroll the loops in the 1848 block calculations (much slower) 1849 . -mat_block_size - size of the blocks to use 1850 1851 Level: intermediate 1852 1853 Notes: 1854 A nonzero block is any block that as 1 or more nonzeros in it 1855 1856 The block AIJ format is fully compatible with standard Fortran 77 1857 storage. That is, the stored row and column indices can begin at 1858 either one (as in Fortran) or zero. See the users' manual for details. 1859 1860 Specify the preallocated storage with either nz or nnz (not both). 1861 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1862 allocation. For additional details, see the users manual chapter on 1863 matrices. 1864 1865 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1866 @*/ 1867 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A) 1868 { 1869 int ierr; 1870 1871 PetscFunctionBegin; 1872 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1873 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 1874 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1875 PetscFunctionReturn(0); 1876 } 1877 1878 #undef __FUNCT__ 1879 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 1880 /*@C 1881 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 1882 per row in the matrix. For good matrix assembly performance the 1883 user should preallocate the matrix storage by setting the parameter nz 1884 (or the array nnz). By setting these parameters accurately, performance 1885 during matrix assembly can be increased by more than a factor of 50. 1886 1887 Collective on MPI_Comm 1888 1889 Input Parameters: 1890 + A - the matrix 1891 . bs - size of block 1892 . nz - number of block nonzeros per block row (same for all rows) 1893 - nnz - array containing the number of block nonzeros in the various block rows 1894 (possibly different for each block row) or PETSC_NULL 1895 1896 Options Database Keys: 1897 . -mat_no_unroll - uses code that does not unroll the loops in the 1898 block calculations (much slower) 1899 . -mat_block_size - size of the blocks to use 1900 1901 Level: intermediate 1902 1903 Notes: 1904 The block AIJ format is fully compatible with standard Fortran 77 1905 storage. That is, the stored row and column indices can begin at 1906 either one (as in Fortran) or zero. See the users' manual for details. 1907 1908 Specify the preallocated storage with either nz or nnz (not both). 1909 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1910 allocation. For additional details, see the users manual chapter on 1911 matrices. 1912 1913 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1914 @*/ 1915 int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz) 1916 { 1917 Mat_SeqBAIJ *b; 1918 int i,len,ierr,mbs,nbs,bs2,newbs = bs; 1919 PetscTruth flg; 1920 1921 PetscFunctionBegin; 1922 ierr = PetscTypeCompare((PetscObject)B,MATSEQBAIJ,&flg);CHKERRQ(ierr); 1923 if (!flg) PetscFunctionReturn(0); 1924 1925 B->preallocated = PETSC_TRUE; 1926 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr); 1927 if (nnz && newbs != bs) { 1928 SETERRQ(1,"Cannot change blocksize from command line if setting nnz"); 1929 } 1930 bs = newbs; 1931 1932 mbs = B->m/bs; 1933 nbs = B->n/bs; 1934 bs2 = bs*bs; 1935 1936 if (mbs*bs!=B->m || nbs*bs!=B->n) { 1937 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs); 1938 } 1939 1940 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1941 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 1942 if (nnz) { 1943 for (i=0; i<mbs; i++) { 1944 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1945 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs); 1946 } 1947 } 1948 1949 b = (Mat_SeqBAIJ*)B->data; 1950 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1951 if (!flg) { 1952 switch (bs) { 1953 case 1: 1954 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 1955 B->ops->solve = MatSolve_SeqBAIJ_1; 1956 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_1; 1957 B->ops->mult = MatMult_SeqBAIJ_1; 1958 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 1959 break; 1960 case 2: 1961 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 1962 B->ops->solve = MatSolve_SeqBAIJ_2; 1963 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_2; 1964 B->ops->mult = MatMult_SeqBAIJ_2; 1965 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 1966 break; 1967 case 3: 1968 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 1969 B->ops->solve = MatSolve_SeqBAIJ_3; 1970 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_3; 1971 B->ops->mult = MatMult_SeqBAIJ_3; 1972 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 1973 break; 1974 case 4: 1975 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 1976 B->ops->solve = MatSolve_SeqBAIJ_4; 1977 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_4; 1978 B->ops->mult = MatMult_SeqBAIJ_4; 1979 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 1980 break; 1981 case 5: 1982 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 1983 B->ops->solve = MatSolve_SeqBAIJ_5; 1984 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_5; 1985 B->ops->mult = MatMult_SeqBAIJ_5; 1986 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 1987 break; 1988 case 6: 1989 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 1990 B->ops->solve = MatSolve_SeqBAIJ_6; 1991 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_6; 1992 B->ops->mult = MatMult_SeqBAIJ_6; 1993 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 1994 break; 1995 case 7: 1996 B->ops->mult = MatMult_SeqBAIJ_7; 1997 B->ops->solve = MatSolve_SeqBAIJ_7; 1998 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_7; 1999 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2000 break; 2001 default: 2002 B->ops->mult = MatMult_SeqBAIJ_N; 2003 B->ops->solve = MatSolve_SeqBAIJ_N; 2004 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2005 break; 2006 } 2007 } 2008 b->bs = bs; 2009 b->mbs = mbs; 2010 b->nbs = nbs; 2011 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 2012 if (!nnz) { 2013 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2014 else if (nz <= 0) nz = 1; 2015 for (i=0; i<mbs; i++) b->imax[i] = nz; 2016 nz = nz*mbs; 2017 } else { 2018 nz = 0; 2019 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2020 } 2021 2022 /* allocate the matrix space */ 2023 len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 2024 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 2025 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2026 b->j = (int*)(b->a + nz*bs2); 2027 ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr); 2028 b->i = b->j + nz; 2029 b->singlemalloc = PETSC_TRUE; 2030 2031 b->i[0] = 0; 2032 for (i=1; i<mbs+1; i++) { 2033 b->i[i] = b->i[i-1] + b->imax[i-1]; 2034 } 2035 2036 /* b->ilen will count nonzeros in each block row so far. */ 2037 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 2038 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 2039 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2040 2041 b->bs = bs; 2042 b->bs2 = bs2; 2043 b->mbs = mbs; 2044 b->nz = 0; 2045 b->maxnz = nz*bs2; 2046 B->info.nz_unneeded = (PetscReal)b->maxnz; 2047 PetscFunctionReturn(0); 2048 } 2049 2050