1 /*$Id: baij.c,v 1.230 2001/06/22 19:05:58 buschelm Exp buschelm $*/ 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "petscsys.h" /*I "petscmat.h" I*/ 8 #include "src/mat/impls/baij/seq/baij.h" 9 #include "src/vec/vecimpl.h" 10 #include "src/inline/spops.h" 11 12 /* UGLY, ugly, ugly 13 When MatScalar == Scalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 14 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 15 inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 16 converts the entries into single precision and then calls ..._MatScalar() to put them 17 into the single precision data structures. 18 */ 19 #if defined(PETSC_USE_MAT_SINGLE) 20 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 21 #else 22 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 23 #endif 24 25 #define CHUNKSIZE 10 26 27 /* 28 Checks for missing diagonals 29 */ 30 #undef __FUNCT__ 31 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 32 int MatMissingDiagonal_SeqBAIJ(Mat A) 33 { 34 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 35 int *diag,*jj = a->j,i,ierr; 36 37 PetscFunctionBegin; 38 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 39 diag = a->diag; 40 for (i=0; i<a->mbs; i++) { 41 if (jj[diag[i]] != i) { 42 SETERRQ1(1,"Matrix is missing diagonal number %d",i); 43 } 44 } 45 PetscFunctionReturn(0); 46 } 47 48 #undef __FUNCT__ 49 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 50 int MatMarkDiagonal_SeqBAIJ(Mat A) 51 { 52 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 53 int i,j,*diag,m = a->mbs,ierr; 54 55 PetscFunctionBegin; 56 if (a->diag) PetscFunctionReturn(0); 57 58 ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr); 59 PetscLogObjectMemory(A,(m+1)*sizeof(int)); 60 for (i=0; i<m; i++) { 61 diag[i] = a->i[i+1]; 62 for (j=a->i[i]; j<a->i[i+1]; j++) { 63 if (a->j[j] == i) { 64 diag[i] = j; 65 break; 66 } 67 } 68 } 69 a->diag = diag; 70 PetscFunctionReturn(0); 71 } 72 73 74 EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 75 76 #undef __FUNCT__ 77 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 78 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 79 { 80 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 81 int ierr,n = a->mbs,i; 82 83 PetscFunctionBegin; 84 *nn = n; 85 if (!ia) PetscFunctionReturn(0); 86 if (symmetric) { 87 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 88 } else if (oshift == 1) { 89 /* temporarily add 1 to i and j indices */ 90 int nz = a->i[n]; 91 for (i=0; i<nz; i++) a->j[i]++; 92 for (i=0; i<n+1; i++) a->i[i]++; 93 *ia = a->i; *ja = a->j; 94 } else { 95 *ia = a->i; *ja = a->j; 96 } 97 98 PetscFunctionReturn(0); 99 } 100 101 #undef __FUNCT__ 102 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 103 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 104 { 105 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 106 int i,n = a->mbs,ierr; 107 108 PetscFunctionBegin; 109 if (!ia) PetscFunctionReturn(0); 110 if (symmetric) { 111 ierr = PetscFree(*ia);CHKERRQ(ierr); 112 ierr = PetscFree(*ja);CHKERRQ(ierr); 113 } else if (oshift == 1) { 114 int nz = a->i[n]-1; 115 for (i=0; i<nz; i++) a->j[i]--; 116 for (i=0; i<n+1; i++) a->i[i]--; 117 } 118 PetscFunctionReturn(0); 119 } 120 121 #undef __FUNCT__ 122 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ" 123 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs) 124 { 125 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 126 127 PetscFunctionBegin; 128 *bs = baij->bs; 129 PetscFunctionReturn(0); 130 } 131 132 133 #undef __FUNCT__ 134 #define __FUNCT__ "MatDestroy_SeqBAIJ" 135 int MatDestroy_SeqBAIJ(Mat A) 136 { 137 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 138 int ierr; 139 140 PetscFunctionBegin; 141 #if defined(PETSC_USE_LOG) 142 PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz); 143 #endif 144 ierr = PetscFree(a->a);CHKERRQ(ierr); 145 if (!a->singlemalloc) { 146 ierr = PetscFree(a->i);CHKERRQ(ierr); 147 ierr = PetscFree(a->j);CHKERRQ(ierr); 148 } 149 if (a->row) { 150 ierr = ISDestroy(a->row);CHKERRQ(ierr); 151 } 152 if (a->col) { 153 ierr = ISDestroy(a->col);CHKERRQ(ierr); 154 } 155 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 156 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 157 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 158 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 159 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 160 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 161 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 162 #if defined(PETSC_USE_MAT_SINGLE) 163 if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);} 164 #endif 165 ierr = PetscFree(a);CHKERRQ(ierr); 166 PetscFunctionReturn(0); 167 } 168 169 #undef __FUNCT__ 170 #define __FUNCT__ "MatSetOption_SeqBAIJ" 171 int MatSetOption_SeqBAIJ(Mat A,MatOption op) 172 { 173 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 174 175 PetscFunctionBegin; 176 switch (op) { 177 case MAT_ROW_ORIENTED: 178 a->roworiented = PETSC_TRUE; 179 break; 180 case MAT_COLUMN_ORIENTED: 181 a->roworiented = PETSC_FALSE; 182 break; 183 case MAT_COLUMNS_SORTED: 184 a->sorted = PETSC_TRUE; 185 break; 186 case MAT_COLUMNS_UNSORTED: 187 a->sorted = PETSC_FALSE; 188 break; 189 case MAT_KEEP_ZEROED_ROWS: 190 a->keepzeroedrows = PETSC_TRUE; 191 break; 192 case MAT_NO_NEW_NONZERO_LOCATIONS: 193 a->nonew = 1; 194 break; 195 case MAT_NEW_NONZERO_LOCATION_ERR: 196 a->nonew = -1; 197 break; 198 case MAT_NEW_NONZERO_ALLOCATION_ERR: 199 a->nonew = -2; 200 break; 201 case MAT_YES_NEW_NONZERO_LOCATIONS: 202 a->nonew = 0; 203 break; 204 case MAT_ROWS_SORTED: 205 case MAT_ROWS_UNSORTED: 206 case MAT_YES_NEW_DIAGONALS: 207 case MAT_IGNORE_OFF_PROC_ENTRIES: 208 case MAT_USE_HASH_TABLE: 209 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n"); 210 break; 211 case MAT_NO_NEW_DIAGONALS: 212 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 213 break; 214 case MAT_USE_SINGLE_PRECISION_SOLVES: 215 #if defined(PETSC_USE_MAT_SINGLE) 216 if (a->bs==4) { 217 PetscTruth sse_enabled; 218 int ierr; 219 ierr = PetscSSEIsEnabled(&sse_enabled);CHKERRQ(ierr); 220 if (sse_enabled) { 221 a->singleprecisionsolves = PETSC_TRUE; 222 } else { 223 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n"); 224 } 225 } else { 226 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n"); 227 } 228 #else 229 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n"); 230 #endif 231 break; 232 default: 233 SETERRQ(PETSC_ERR_SUP,"unknown option"); 234 } 235 PetscFunctionReturn(0); 236 } 237 238 #undef __FUNCT__ 239 #define __FUNCT__ "MatGetOwnershipRange_SeqBAIJ" 240 int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 241 { 242 PetscFunctionBegin; 243 if (m) *m = 0; 244 if (n) *n = A->m; 245 PetscFunctionReturn(0); 246 } 247 248 #undef __FUNCT__ 249 #define __FUNCT__ "MatGetRow_SeqBAIJ" 250 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 251 { 252 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 253 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr; 254 MatScalar *aa,*aa_i; 255 Scalar *v_i; 256 257 PetscFunctionBegin; 258 bs = a->bs; 259 ai = a->i; 260 aj = a->j; 261 aa = a->a; 262 bs2 = a->bs2; 263 264 if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range"); 265 266 bn = row/bs; /* Block number */ 267 bp = row % bs; /* Block Position */ 268 M = ai[bn+1] - ai[bn]; 269 *nz = bs*M; 270 271 if (v) { 272 *v = 0; 273 if (*nz) { 274 ierr = PetscMalloc((*nz)*sizeof(Scalar),v);CHKERRQ(ierr); 275 for (i=0; i<M; i++) { /* for each block in the block row */ 276 v_i = *v + i*bs; 277 aa_i = aa + bs2*(ai[bn] + i); 278 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 279 } 280 } 281 } 282 283 if (idx) { 284 *idx = 0; 285 if (*nz) { 286 ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr); 287 for (i=0; i<M; i++) { /* for each block in the block row */ 288 idx_i = *idx + i*bs; 289 itmp = bs*aj[ai[bn] + i]; 290 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 291 } 292 } 293 } 294 PetscFunctionReturn(0); 295 } 296 297 #undef __FUNCT__ 298 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 299 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 300 { 301 int ierr; 302 303 PetscFunctionBegin; 304 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 305 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 306 PetscFunctionReturn(0); 307 } 308 309 #undef __FUNCT__ 310 #define __FUNCT__ "MatTranspose_SeqBAIJ" 311 int MatTranspose_SeqBAIJ(Mat A,Mat *B) 312 { 313 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 314 Mat C; 315 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 316 int *rows,*cols,bs2=a->bs2; 317 Scalar *array; 318 319 PetscFunctionBegin; 320 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 321 ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr); 322 ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr); 323 324 #if defined(PETSC_USE_MAT_SINGLE) 325 ierr = PetscMalloc(a->bs2*a->nz*sizeof(Scalar),&array);CHKERRQ(ierr); 326 for (i=0; i<a->bs2*a->nz; i++) array[i] = (Scalar)a->a[i]; 327 #else 328 array = a->a; 329 #endif 330 331 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 332 ierr = MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);CHKERRQ(ierr); 333 ierr = PetscFree(col);CHKERRQ(ierr); 334 ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr); 335 cols = rows + bs; 336 for (i=0; i<mbs; i++) { 337 cols[0] = i*bs; 338 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 339 len = ai[i+1] - ai[i]; 340 for (j=0; j<len; j++) { 341 rows[0] = (*aj++)*bs; 342 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 343 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 344 array += bs2; 345 } 346 } 347 ierr = PetscFree(rows);CHKERRQ(ierr); 348 #if defined(PETSC_USE_MAT_SINGLE) 349 ierr = PetscFree(array); 350 #endif 351 352 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 353 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 354 355 if (B) { 356 *B = C; 357 } else { 358 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 359 } 360 PetscFunctionReturn(0); 361 } 362 363 #undef __FUNCT__ 364 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 365 static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 366 { 367 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 368 int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 369 Scalar *aa; 370 FILE *file; 371 372 PetscFunctionBegin; 373 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 374 ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr); 375 col_lens[0] = MAT_COOKIE; 376 377 col_lens[1] = A->m; 378 col_lens[2] = A->n; 379 col_lens[3] = a->nz*bs2; 380 381 /* store lengths of each row and write (including header) to file */ 382 count = 0; 383 for (i=0; i<a->mbs; i++) { 384 for (j=0; j<bs; j++) { 385 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 386 } 387 } 388 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr); 389 ierr = PetscFree(col_lens);CHKERRQ(ierr); 390 391 /* store column indices (zero start index) */ 392 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr); 393 count = 0; 394 for (i=0; i<a->mbs; i++) { 395 for (j=0; j<bs; j++) { 396 for (k=a->i[i]; k<a->i[i+1]; k++) { 397 for (l=0; l<bs; l++) { 398 jj[count++] = bs*a->j[k] + l; 399 } 400 } 401 } 402 } 403 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr); 404 ierr = PetscFree(jj);CHKERRQ(ierr); 405 406 /* store nonzero values */ 407 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(Scalar),&aa);CHKERRQ(ierr); 408 count = 0; 409 for (i=0; i<a->mbs; i++) { 410 for (j=0; j<bs; j++) { 411 for (k=a->i[i]; k<a->i[i+1]; k++) { 412 for (l=0; l<bs; l++) { 413 aa[count++] = a->a[bs2*k + l*bs + j]; 414 } 415 } 416 } 417 } 418 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr); 419 ierr = PetscFree(aa);CHKERRQ(ierr); 420 421 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 422 if (file) { 423 fprintf(file,"-matload_block_size %d\n",a->bs); 424 } 425 PetscFunctionReturn(0); 426 } 427 428 #undef __FUNCT__ 429 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 430 static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 431 { 432 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 433 int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2; 434 PetscViewerFormat format; 435 436 PetscFunctionBegin; 437 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 438 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_LONG) { 439 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 440 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 441 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 442 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 443 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 444 for (i=0; i<a->mbs; i++) { 445 for (j=0; j<bs; j++) { 446 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 447 for (k=a->i[i]; k<a->i[i+1]; k++) { 448 for (l=0; l<bs; l++) { 449 #if defined(PETSC_USE_COMPLEX) 450 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 451 ierr = PetscViewerASCIIPrintf(viewer," %d %g + %gi",bs*a->j[k]+l, 452 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 453 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 454 ierr = PetscViewerASCIIPrintf(viewer," %d %g - %gi",bs*a->j[k]+l, 455 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 456 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 457 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 458 } 459 #else 460 if (a->a[bs2*k + l*bs + j] != 0.0) { 461 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 462 } 463 #endif 464 } 465 } 466 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 467 } 468 } 469 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 470 } else { 471 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 472 for (i=0; i<a->mbs; i++) { 473 for (j=0; j<bs; j++) { 474 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 475 for (k=a->i[i]; k<a->i[i+1]; k++) { 476 for (l=0; l<bs; l++) { 477 #if defined(PETSC_USE_COMPLEX) 478 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 479 ierr = PetscViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l, 480 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 481 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 482 ierr = PetscViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l, 483 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 484 } else { 485 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 486 } 487 #else 488 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 489 #endif 490 } 491 } 492 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 493 } 494 } 495 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 496 } 497 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 498 PetscFunctionReturn(0); 499 } 500 501 #undef __FUNCT__ 502 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 503 static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 504 { 505 Mat A = (Mat) Aa; 506 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 507 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2; 508 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 509 MatScalar *aa; 510 PetscViewer viewer; 511 512 PetscFunctionBegin; 513 514 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 515 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 516 517 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 518 519 /* loop over matrix elements drawing boxes */ 520 color = PETSC_DRAW_BLUE; 521 for (i=0,row=0; i<mbs; i++,row+=bs) { 522 for (j=a->i[i]; j<a->i[i+1]; j++) { 523 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 524 x_l = a->j[j]*bs; x_r = x_l + 1.0; 525 aa = a->a + j*bs2; 526 for (k=0; k<bs; k++) { 527 for (l=0; l<bs; l++) { 528 if (PetscRealPart(*aa++) >= 0.) continue; 529 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 530 } 531 } 532 } 533 } 534 color = PETSC_DRAW_CYAN; 535 for (i=0,row=0; i<mbs; i++,row+=bs) { 536 for (j=a->i[i]; j<a->i[i+1]; j++) { 537 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 538 x_l = a->j[j]*bs; x_r = x_l + 1.0; 539 aa = a->a + j*bs2; 540 for (k=0; k<bs; k++) { 541 for (l=0; l<bs; l++) { 542 if (PetscRealPart(*aa++) != 0.) continue; 543 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 544 } 545 } 546 } 547 } 548 549 color = PETSC_DRAW_RED; 550 for (i=0,row=0; i<mbs; i++,row+=bs) { 551 for (j=a->i[i]; j<a->i[i+1]; j++) { 552 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 553 x_l = a->j[j]*bs; x_r = x_l + 1.0; 554 aa = a->a + j*bs2; 555 for (k=0; k<bs; k++) { 556 for (l=0; l<bs; l++) { 557 if (PetscRealPart(*aa++) <= 0.) continue; 558 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 559 } 560 } 561 } 562 } 563 PetscFunctionReturn(0); 564 } 565 566 #undef __FUNCT__ 567 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 568 static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 569 { 570 int ierr; 571 PetscReal xl,yl,xr,yr,w,h; 572 PetscDraw draw; 573 PetscTruth isnull; 574 575 PetscFunctionBegin; 576 577 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 578 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 579 580 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 581 xr = A->n; yr = A->m; h = yr/10.0; w = xr/10.0; 582 xr += w; yr += h; xl = -w; yl = -h; 583 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 584 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 585 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 586 PetscFunctionReturn(0); 587 } 588 589 #undef __FUNCT__ 590 #define __FUNCT__ "MatView_SeqBAIJ" 591 int MatView_SeqBAIJ(Mat A,PetscViewer viewer) 592 { 593 int ierr; 594 PetscTruth issocket,isascii,isbinary,isdraw; 595 596 PetscFunctionBegin; 597 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 598 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 599 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 600 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 601 if (issocket) { 602 SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported"); 603 } else if (isascii){ 604 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 605 } else if (isbinary) { 606 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 607 } else if (isdraw) { 608 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 609 } else { 610 SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name); 611 } 612 PetscFunctionReturn(0); 613 } 614 615 616 #undef __FUNCT__ 617 #define __FUNCT__ "MatGetValues_SeqBAIJ" 618 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v) 619 { 620 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 621 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 622 int *ai = a->i,*ailen = a->ilen; 623 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 624 MatScalar *ap,*aa = a->a,zero = 0.0; 625 626 PetscFunctionBegin; 627 for (k=0; k<m; k++) { /* loop over rows */ 628 row = im[k]; brow = row/bs; 629 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 630 if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 631 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 632 nrow = ailen[brow]; 633 for (l=0; l<n; l++) { /* loop over columns */ 634 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 635 if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 636 col = in[l] ; 637 bcol = col/bs; 638 cidx = col%bs; 639 ridx = row%bs; 640 high = nrow; 641 low = 0; /* assume unsorted */ 642 while (high-low > 5) { 643 t = (low+high)/2; 644 if (rp[t] > bcol) high = t; 645 else low = t; 646 } 647 for (i=low; i<high; i++) { 648 if (rp[i] > bcol) break; 649 if (rp[i] == bcol) { 650 *v++ = ap[bs2*i+bs*cidx+ridx]; 651 goto finished; 652 } 653 } 654 *v++ = zero; 655 finished:; 656 } 657 } 658 PetscFunctionReturn(0); 659 } 660 661 #if defined(PETSC_USE_MAT_SINGLE) 662 #undef __FUNCT__ 663 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 664 int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 665 { 666 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 667 int ierr,i,N = m*n*b->bs2; 668 MatScalar *vsingle; 669 670 PetscFunctionBegin; 671 if (N > b->setvalueslen) { 672 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 673 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 674 b->setvalueslen = N; 675 } 676 vsingle = b->setvaluescopy; 677 for (i=0; i<N; i++) { 678 vsingle[i] = v[i]; 679 } 680 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 681 PetscFunctionReturn(0); 682 } 683 #endif 684 685 686 #undef __FUNCT__ 687 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 688 int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,int *im,int n,int *in,MatScalar *v,InsertMode is) 689 { 690 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 691 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 692 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 693 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 694 PetscTruth roworiented=a->roworiented; 695 MatScalar *value = v,*ap,*aa = a->a,*bap; 696 697 PetscFunctionBegin; 698 if (roworiented) { 699 stepval = (n-1)*bs; 700 } else { 701 stepval = (m-1)*bs; 702 } 703 for (k=0; k<m; k++) { /* loop over added rows */ 704 row = im[k]; 705 if (row < 0) continue; 706 #if defined(PETSC_USE_BOPT_g) 707 if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 708 #endif 709 rp = aj + ai[row]; 710 ap = aa + bs2*ai[row]; 711 rmax = imax[row]; 712 nrow = ailen[row]; 713 low = 0; 714 for (l=0; l<n; l++) { /* loop over added columns */ 715 if (in[l] < 0) continue; 716 #if defined(PETSC_USE_BOPT_g) 717 if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 718 #endif 719 col = in[l]; 720 if (roworiented) { 721 value = v + k*(stepval+bs)*bs + l*bs; 722 } else { 723 value = v + l*(stepval+bs)*bs + k*bs; 724 } 725 if (!sorted) low = 0; high = nrow; 726 while (high-low > 7) { 727 t = (low+high)/2; 728 if (rp[t] > col) high = t; 729 else low = t; 730 } 731 for (i=low; i<high; i++) { 732 if (rp[i] > col) break; 733 if (rp[i] == col) { 734 bap = ap + bs2*i; 735 if (roworiented) { 736 if (is == ADD_VALUES) { 737 for (ii=0; ii<bs; ii++,value+=stepval) { 738 for (jj=ii; jj<bs2; jj+=bs) { 739 bap[jj] += *value++; 740 } 741 } 742 } else { 743 for (ii=0; ii<bs; ii++,value+=stepval) { 744 for (jj=ii; jj<bs2; jj+=bs) { 745 bap[jj] = *value++; 746 } 747 } 748 } 749 } else { 750 if (is == ADD_VALUES) { 751 for (ii=0; ii<bs; ii++,value+=stepval) { 752 for (jj=0; jj<bs; jj++) { 753 *bap++ += *value++; 754 } 755 } 756 } else { 757 for (ii=0; ii<bs; ii++,value+=stepval) { 758 for (jj=0; jj<bs; jj++) { 759 *bap++ = *value++; 760 } 761 } 762 } 763 } 764 goto noinsert2; 765 } 766 } 767 if (nonew == 1) goto noinsert2; 768 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 769 if (nrow >= rmax) { 770 /* there is no extra room in row, therefore enlarge */ 771 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 772 MatScalar *new_a; 773 774 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 775 776 /* malloc new storage space */ 777 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 778 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 779 new_j = (int*)(new_a + bs2*new_nz); 780 new_i = new_j + new_nz; 781 782 /* copy over old data into new slots */ 783 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 784 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 785 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 786 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 787 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 788 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 789 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 790 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 791 /* free up old matrix storage */ 792 ierr = PetscFree(a->a);CHKERRQ(ierr); 793 if (!a->singlemalloc) { 794 ierr = PetscFree(a->i);CHKERRQ(ierr); 795 ierr = PetscFree(a->j);CHKERRQ(ierr); 796 } 797 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 798 a->singlemalloc = PETSC_TRUE; 799 800 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 801 rmax = imax[row] = imax[row] + CHUNKSIZE; 802 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 803 a->maxnz += bs2*CHUNKSIZE; 804 a->reallocs++; 805 a->nz++; 806 } 807 N = nrow++ - 1; 808 /* shift up all the later entries in this row */ 809 for (ii=N; ii>=i; ii--) { 810 rp[ii+1] = rp[ii]; 811 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 812 } 813 if (N >= i) { 814 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 815 } 816 rp[i] = col; 817 bap = ap + bs2*i; 818 if (roworiented) { 819 for (ii=0; ii<bs; ii++,value+=stepval) { 820 for (jj=ii; jj<bs2; jj+=bs) { 821 bap[jj] = *value++; 822 } 823 } 824 } else { 825 for (ii=0; ii<bs; ii++,value+=stepval) { 826 for (jj=0; jj<bs; jj++) { 827 *bap++ = *value++; 828 } 829 } 830 } 831 noinsert2:; 832 low = i; 833 } 834 ailen[row] = nrow; 835 } 836 PetscFunctionReturn(0); 837 } 838 839 840 #undef __FUNCT__ 841 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 842 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 843 { 844 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 845 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 846 int m = A->m,*ip,N,*ailen = a->ilen; 847 int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 848 MatScalar *aa = a->a,*ap; 849 850 PetscFunctionBegin; 851 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 852 853 if (m) rmax = ailen[0]; 854 for (i=1; i<mbs; i++) { 855 /* move each row back by the amount of empty slots (fshift) before it*/ 856 fshift += imax[i-1] - ailen[i-1]; 857 rmax = PetscMax(rmax,ailen[i]); 858 if (fshift) { 859 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 860 N = ailen[i]; 861 for (j=0; j<N; j++) { 862 ip[j-fshift] = ip[j]; 863 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 864 } 865 } 866 ai[i] = ai[i-1] + ailen[i-1]; 867 } 868 if (mbs) { 869 fshift += imax[mbs-1] - ailen[mbs-1]; 870 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 871 } 872 /* reset ilen and imax for each row */ 873 for (i=0; i<mbs; i++) { 874 ailen[i] = imax[i] = ai[i+1] - ai[i]; 875 } 876 a->nz = ai[mbs]; 877 878 /* diagonals may have moved, so kill the diagonal pointers */ 879 if (fshift && a->diag) { 880 ierr = PetscFree(a->diag);CHKERRQ(ierr); 881 PetscLogObjectMemory(A,-(m+1)*sizeof(int)); 882 a->diag = 0; 883 } 884 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 885 m,A->n,a->bs,fshift*bs2,a->nz*bs2); 886 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n", 887 a->reallocs); 888 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 889 a->reallocs = 0; 890 A->info.nz_unneeded = (PetscReal)fshift*bs2; 891 892 PetscFunctionReturn(0); 893 } 894 895 896 897 /* 898 This function returns an array of flags which indicate the locations of contiguous 899 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 900 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 901 Assume: sizes should be long enough to hold all the values. 902 */ 903 #undef __FUNCT__ 904 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 905 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 906 { 907 int i,j,k,row; 908 PetscTruth flg; 909 910 PetscFunctionBegin; 911 for (i=0,j=0; i<n; j++) { 912 row = idx[i]; 913 if (row%bs!=0) { /* Not the begining of a block */ 914 sizes[j] = 1; 915 i++; 916 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 917 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 918 i++; 919 } else { /* Begining of the block, so check if the complete block exists */ 920 flg = PETSC_TRUE; 921 for (k=1; k<bs; k++) { 922 if (row+k != idx[i+k]) { /* break in the block */ 923 flg = PETSC_FALSE; 924 break; 925 } 926 } 927 if (flg == PETSC_TRUE) { /* No break in the bs */ 928 sizes[j] = bs; 929 i+= bs; 930 } else { 931 sizes[j] = 1; 932 i++; 933 } 934 } 935 } 936 *bs_max = j; 937 PetscFunctionReturn(0); 938 } 939 940 #undef __FUNCT__ 941 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 942 int MatZeroRows_SeqBAIJ(Mat A,IS is,Scalar *diag) 943 { 944 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 945 int ierr,i,j,k,count,is_n,*is_idx,*rows; 946 int bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max; 947 Scalar zero = 0.0; 948 MatScalar *aa; 949 950 PetscFunctionBegin; 951 /* Make a copy of the IS and sort it */ 952 ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr); 953 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 954 955 /* allocate memory for rows,sizes */ 956 ierr = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr); 957 sizes = rows + is_n; 958 959 /* copy IS values to rows, and sort them */ 960 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 961 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 962 if (baij->keepzeroedrows) { 963 for (i=0; i<is_n; i++) { sizes[i] = 1; } 964 bs_max = is_n; 965 } else { 966 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 967 } 968 ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 969 970 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 971 row = rows[j]; 972 if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row); 973 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 974 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 975 if (sizes[i] == bs && !baij->keepzeroedrows) { 976 if (diag) { 977 if (baij->ilen[row/bs] > 0) { 978 baij->ilen[row/bs] = 1; 979 baij->j[baij->i[row/bs]] = row/bs; 980 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 981 } 982 /* Now insert all the diagonal values for this bs */ 983 for (k=0; k<bs; k++) { 984 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 985 } 986 } else { /* (!diag) */ 987 baij->ilen[row/bs] = 0; 988 } /* end (!diag) */ 989 } else { /* (sizes[i] != bs) */ 990 #if defined (PETSC_USE_DEBUG) 991 if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1"); 992 #endif 993 for (k=0; k<count; k++) { 994 aa[0] = zero; 995 aa += bs; 996 } 997 if (diag) { 998 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 999 } 1000 } 1001 } 1002 1003 ierr = PetscFree(rows);CHKERRQ(ierr); 1004 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1005 PetscFunctionReturn(0); 1006 } 1007 1008 #undef __FUNCT__ 1009 #define __FUNCT__ "MatSetValues_SeqBAIJ" 1010 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 1011 { 1012 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1013 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 1014 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1015 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 1016 int ridx,cidx,bs2=a->bs2,ierr; 1017 PetscTruth roworiented=a->roworiented; 1018 MatScalar *ap,value,*aa=a->a,*bap; 1019 1020 PetscFunctionBegin; 1021 for (k=0; k<m; k++) { /* loop over added rows */ 1022 row = im[k]; brow = row/bs; 1023 if (row < 0) continue; 1024 #if defined(PETSC_USE_BOPT_g) 1025 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m); 1026 #endif 1027 rp = aj + ai[brow]; 1028 ap = aa + bs2*ai[brow]; 1029 rmax = imax[brow]; 1030 nrow = ailen[brow]; 1031 low = 0; 1032 for (l=0; l<n; l++) { /* loop over added columns */ 1033 if (in[l] < 0) continue; 1034 #if defined(PETSC_USE_BOPT_g) 1035 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n); 1036 #endif 1037 col = in[l]; bcol = col/bs; 1038 ridx = row % bs; cidx = col % bs; 1039 if (roworiented) { 1040 value = v[l + k*n]; 1041 } else { 1042 value = v[k + l*m]; 1043 } 1044 if (!sorted) low = 0; high = nrow; 1045 while (high-low > 7) { 1046 t = (low+high)/2; 1047 if (rp[t] > bcol) high = t; 1048 else low = t; 1049 } 1050 for (i=low; i<high; i++) { 1051 if (rp[i] > bcol) break; 1052 if (rp[i] == bcol) { 1053 bap = ap + bs2*i + bs*cidx + ridx; 1054 if (is == ADD_VALUES) *bap += value; 1055 else *bap = value; 1056 goto noinsert1; 1057 } 1058 } 1059 if (nonew == 1) goto noinsert1; 1060 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 1061 if (nrow >= rmax) { 1062 /* there is no extra room in row, therefore enlarge */ 1063 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1064 MatScalar *new_a; 1065 1066 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 1067 1068 /* Malloc new storage space */ 1069 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 1070 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 1071 new_j = (int*)(new_a + bs2*new_nz); 1072 new_i = new_j + new_nz; 1073 1074 /* copy over old data into new slots */ 1075 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 1076 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1077 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 1078 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 1079 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 1080 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1081 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1082 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1083 /* free up old matrix storage */ 1084 ierr = PetscFree(a->a);CHKERRQ(ierr); 1085 if (!a->singlemalloc) { 1086 ierr = PetscFree(a->i);CHKERRQ(ierr); 1087 ierr = PetscFree(a->j);CHKERRQ(ierr); 1088 } 1089 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1090 a->singlemalloc = PETSC_TRUE; 1091 1092 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1093 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 1094 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 1095 a->maxnz += bs2*CHUNKSIZE; 1096 a->reallocs++; 1097 a->nz++; 1098 } 1099 N = nrow++ - 1; 1100 /* shift up all the later entries in this row */ 1101 for (ii=N; ii>=i; ii--) { 1102 rp[ii+1] = rp[ii]; 1103 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1104 } 1105 if (N>=i) { 1106 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1107 } 1108 rp[i] = bcol; 1109 ap[bs2*i + bs*cidx + ridx] = value; 1110 noinsert1:; 1111 low = i; 1112 } 1113 ailen[brow] = nrow; 1114 } 1115 PetscFunctionReturn(0); 1116 } 1117 1118 EXTERN int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,MatLUInfo*,Mat*); 1119 EXTERN int MatLUFactor_SeqBAIJ(Mat,IS,IS,MatLUInfo*); 1120 EXTERN int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int); 1121 EXTERN int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,int,MatReuse,Mat*); 1122 EXTERN int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatReuse,Mat**); 1123 EXTERN int MatMultTranspose_SeqBAIJ(Mat,Vec,Vec); 1124 EXTERN int MatMultTransposeAdd_SeqBAIJ(Mat,Vec,Vec,Vec); 1125 EXTERN int MatScale_SeqBAIJ(Scalar*,Mat); 1126 EXTERN int MatNorm_SeqBAIJ(Mat,NormType,PetscReal *); 1127 EXTERN int MatEqual_SeqBAIJ(Mat,Mat,PetscTruth*); 1128 EXTERN int MatGetDiagonal_SeqBAIJ(Mat,Vec); 1129 EXTERN int MatDiagonalScale_SeqBAIJ(Mat,Vec,Vec); 1130 EXTERN int MatGetInfo_SeqBAIJ(Mat,MatInfoType,MatInfo *); 1131 EXTERN int MatZeroEntries_SeqBAIJ(Mat); 1132 1133 EXTERN int MatSolve_SeqBAIJ_N(Mat,Vec,Vec); 1134 EXTERN int MatSolve_SeqBAIJ_1(Mat,Vec,Vec); 1135 EXTERN int MatSolve_SeqBAIJ_2(Mat,Vec,Vec); 1136 EXTERN int MatSolve_SeqBAIJ_3(Mat,Vec,Vec); 1137 EXTERN int MatSolve_SeqBAIJ_4(Mat,Vec,Vec); 1138 EXTERN int MatSolve_SeqBAIJ_5(Mat,Vec,Vec); 1139 EXTERN int MatSolve_SeqBAIJ_6(Mat,Vec,Vec); 1140 EXTERN int MatSolve_SeqBAIJ_7(Mat,Vec,Vec); 1141 EXTERN int MatSolveTranspose_SeqBAIJ_7(Mat,Vec,Vec); 1142 EXTERN int MatSolveTranspose_SeqBAIJ_6(Mat,Vec,Vec); 1143 EXTERN int MatSolveTranspose_SeqBAIJ_5(Mat,Vec,Vec); 1144 EXTERN int MatSolveTranspose_SeqBAIJ_4(Mat,Vec,Vec); 1145 EXTERN int MatSolveTranspose_SeqBAIJ_3(Mat,Vec,Vec); 1146 EXTERN int MatSolveTranspose_SeqBAIJ_2(Mat,Vec,Vec); 1147 EXTERN int MatSolveTranspose_SeqBAIJ_1(Mat,Vec,Vec); 1148 1149 EXTERN int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*); 1150 EXTERN int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*); 1151 EXTERN int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*); 1152 EXTERN int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*); 1153 EXTERN int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*); 1154 EXTERN int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*); 1155 EXTERN int MatLUFactorNumeric_SeqBAIJ_6(Mat,Mat*); 1156 1157 EXTERN int MatMult_SeqBAIJ_1(Mat,Vec,Vec); 1158 EXTERN int MatMult_SeqBAIJ_2(Mat,Vec,Vec); 1159 EXTERN int MatMult_SeqBAIJ_3(Mat,Vec,Vec); 1160 EXTERN int MatMult_SeqBAIJ_4(Mat,Vec,Vec); 1161 EXTERN int MatMult_SeqBAIJ_5(Mat,Vec,Vec); 1162 EXTERN int MatMult_SeqBAIJ_6(Mat,Vec,Vec); 1163 EXTERN int MatMult_SeqBAIJ_7(Mat,Vec,Vec); 1164 EXTERN int MatMult_SeqBAIJ_N(Mat,Vec,Vec); 1165 1166 EXTERN int MatMultAdd_SeqBAIJ_1(Mat,Vec,Vec,Vec); 1167 EXTERN int MatMultAdd_SeqBAIJ_2(Mat,Vec,Vec,Vec); 1168 EXTERN int MatMultAdd_SeqBAIJ_3(Mat,Vec,Vec,Vec); 1169 EXTERN int MatMultAdd_SeqBAIJ_4(Mat,Vec,Vec,Vec); 1170 EXTERN int MatMultAdd_SeqBAIJ_5(Mat,Vec,Vec,Vec); 1171 EXTERN int MatMultAdd_SeqBAIJ_6(Mat,Vec,Vec,Vec); 1172 EXTERN int MatMultAdd_SeqBAIJ_7(Mat,Vec,Vec,Vec); 1173 EXTERN int MatMultAdd_SeqBAIJ_N(Mat,Vec,Vec,Vec); 1174 1175 #undef __FUNCT__ 1176 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 1177 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info) 1178 { 1179 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1180 Mat outA; 1181 int ierr; 1182 PetscTruth row_identity,col_identity; 1183 1184 PetscFunctionBegin; 1185 if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1186 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1187 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1188 if (!row_identity || !col_identity) { 1189 SETERRQ(1,"Row and column permutations must be identity for in-place ILU"); 1190 } 1191 1192 outA = inA; 1193 inA->factor = FACTOR_LU; 1194 1195 if (!a->diag) { 1196 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1197 } 1198 /* 1199 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1200 for ILU(0) factorization with natural ordering 1201 */ 1202 switch (a->bs) { 1203 case 1: 1204 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 1205 inA->ops->solve = MatSolve_SeqBAIJ_1_NaturalOrdering; 1206 inA->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_1_NaturalOrdering; 1207 PetscLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=1\n"); 1208 case 2: 1209 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering; 1210 inA->ops->solve = MatSolve_SeqBAIJ_2_NaturalOrdering; 1211 inA->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_2_NaturalOrdering; 1212 PetscLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 1213 break; 1214 case 3: 1215 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering; 1216 inA->ops->solve = MatSolve_SeqBAIJ_3_NaturalOrdering; 1217 inA->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_3_NaturalOrdering; 1218 PetscLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=3\n"); 1219 break; 1220 case 4: 1221 #if defined(PETSC_HAVE_SSE) 1222 { 1223 PetscTruth sse_enabled; 1224 ierr = PetscSSEIsEnabled(&sse_enabled);CHKERRQ(ierr); 1225 if (sse_enabled) { 1226 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE; 1227 } else { 1228 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering; 1229 } 1230 } 1231 #else 1232 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering; 1233 #endif 1234 inA->ops->solve = MatSolve_SeqBAIJ_4_NaturalOrdering; 1235 inA->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_4_NaturalOrdering; 1236 PetscLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 1237 break; 1238 case 5: 1239 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering; 1240 inA->ops->solve = MatSolve_SeqBAIJ_5_NaturalOrdering; 1241 inA->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_5_NaturalOrdering; 1242 PetscLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 1243 break; 1244 case 6: 1245 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering; 1246 inA->ops->solve = MatSolve_SeqBAIJ_6_NaturalOrdering; 1247 inA->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_6_NaturalOrdering; 1248 PetscLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 1249 break; 1250 case 7: 1251 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering; 1252 inA->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_7_NaturalOrdering; 1253 inA->ops->solve = MatSolve_SeqBAIJ_7_NaturalOrdering; 1254 PetscLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 1255 break; 1256 default: 1257 a->row = row; 1258 a->col = col; 1259 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1260 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1261 1262 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1263 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1264 PetscLogObjectParent(inA,a->icol); 1265 1266 if (!a->solve_work) { 1267 ierr = PetscMalloc((inA->m+a->bs)*sizeof(Scalar),&a->solve_work);CHKERRQ(ierr); 1268 PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(Scalar)); 1269 } 1270 } 1271 1272 ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr); 1273 1274 PetscFunctionReturn(0); 1275 } 1276 #undef __FUNCT__ 1277 #define __FUNCT__ "MatPrintHelp_SeqBAIJ" 1278 int MatPrintHelp_SeqBAIJ(Mat A) 1279 { 1280 static PetscTruth called = PETSC_FALSE; 1281 MPI_Comm comm = A->comm; 1282 int ierr; 1283 1284 PetscFunctionBegin; 1285 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1286 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1287 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1288 PetscFunctionReturn(0); 1289 } 1290 1291 EXTERN_C_BEGIN 1292 #undef __FUNCT__ 1293 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1294 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices) 1295 { 1296 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1297 int i,nz,n; 1298 1299 PetscFunctionBegin; 1300 nz = baij->maxnz; 1301 n = mat->n; 1302 for (i=0; i<nz; i++) { 1303 baij->j[i] = indices[i]; 1304 } 1305 baij->nz = nz; 1306 for (i=0; i<n; i++) { 1307 baij->ilen[i] = baij->imax[i]; 1308 } 1309 1310 PetscFunctionReturn(0); 1311 } 1312 EXTERN_C_END 1313 1314 #undef __FUNCT__ 1315 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 1316 /*@ 1317 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1318 in the matrix. 1319 1320 Input Parameters: 1321 + mat - the SeqBAIJ matrix 1322 - indices - the column indices 1323 1324 Level: advanced 1325 1326 Notes: 1327 This can be called if you have precomputed the nonzero structure of the 1328 matrix and want to provide it to the matrix object to improve the performance 1329 of the MatSetValues() operation. 1330 1331 You MUST have set the correct numbers of nonzeros per row in the call to 1332 MatCreateSeqBAIJ(). 1333 1334 MUST be called before any calls to MatSetValues(); 1335 1336 @*/ 1337 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices) 1338 { 1339 int ierr,(*f)(Mat,int *); 1340 1341 PetscFunctionBegin; 1342 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1343 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)())&f);CHKERRQ(ierr); 1344 if (f) { 1345 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1346 } else { 1347 SETERRQ(1,"Wrong type of matrix to set column indices"); 1348 } 1349 PetscFunctionReturn(0); 1350 } 1351 1352 #undef __FUNCT__ 1353 #define __FUNCT__ "MatGetRowMax_SeqBAIJ" 1354 int MatGetRowMax_SeqBAIJ(Mat A,Vec v) 1355 { 1356 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1357 int ierr,i,j,n,row,bs,*ai,*aj,mbs; 1358 PetscReal atmp; 1359 Scalar *x,zero = 0.0; 1360 MatScalar *aa; 1361 int ncols,brow,krow,kcol; 1362 1363 PetscFunctionBegin; 1364 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1365 bs = a->bs; 1366 aa = a->a; 1367 ai = a->i; 1368 aj = a->j; 1369 mbs = a->mbs; 1370 1371 ierr = VecSet(&zero,v);CHKERRQ(ierr); 1372 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1373 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1374 if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1375 for (i=0; i<mbs; i++) { 1376 ncols = ai[1] - ai[0]; ai++; 1377 brow = bs*i; 1378 for (j=0; j<ncols; j++){ 1379 /* bcol = bs*(*aj); */ 1380 for (kcol=0; kcol<bs; kcol++){ 1381 for (krow=0; krow<bs; krow++){ 1382 atmp = PetscAbsScalar(*aa); aa++; 1383 row = brow + krow; /* row index */ 1384 /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */ 1385 if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp; 1386 } 1387 } 1388 aj++; 1389 } 1390 } 1391 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1392 PetscFunctionReturn(0); 1393 } 1394 1395 #undef __FUNCT__ 1396 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1397 int MatSetUpPreallocation_SeqBAIJ(Mat A) 1398 { 1399 int ierr; 1400 1401 PetscFunctionBegin; 1402 ierr = MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1403 PetscFunctionReturn(0); 1404 } 1405 1406 #undef __FUNCT__ 1407 #define __FUNCT__ "MatGetArray_SeqBAIJ" 1408 int MatGetArray_SeqBAIJ(Mat A,Scalar **array) 1409 { 1410 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1411 PetscFunctionBegin; 1412 *array = a->a; 1413 PetscFunctionReturn(0); 1414 } 1415 1416 #undef __FUNCT__ 1417 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 1418 int MatRestoreArray_SeqBAIJ(Mat A,Scalar **array) 1419 { 1420 PetscFunctionBegin; 1421 PetscFunctionReturn(0); 1422 } 1423 1424 /* -------------------------------------------------------------------*/ 1425 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 1426 MatGetRow_SeqBAIJ, 1427 MatRestoreRow_SeqBAIJ, 1428 MatMult_SeqBAIJ_N, 1429 MatMultAdd_SeqBAIJ_N, 1430 MatMultTranspose_SeqBAIJ, 1431 MatMultTransposeAdd_SeqBAIJ, 1432 MatSolve_SeqBAIJ_N, 1433 0, 1434 0, 1435 0, 1436 MatLUFactor_SeqBAIJ, 1437 0, 1438 0, 1439 MatTranspose_SeqBAIJ, 1440 MatGetInfo_SeqBAIJ, 1441 MatEqual_SeqBAIJ, 1442 MatGetDiagonal_SeqBAIJ, 1443 MatDiagonalScale_SeqBAIJ, 1444 MatNorm_SeqBAIJ, 1445 0, 1446 MatAssemblyEnd_SeqBAIJ, 1447 0, 1448 MatSetOption_SeqBAIJ, 1449 MatZeroEntries_SeqBAIJ, 1450 MatZeroRows_SeqBAIJ, 1451 MatLUFactorSymbolic_SeqBAIJ, 1452 MatLUFactorNumeric_SeqBAIJ_N, 1453 0, 1454 0, 1455 MatSetUpPreallocation_SeqBAIJ, 1456 0, 1457 MatGetOwnershipRange_SeqBAIJ, 1458 MatILUFactorSymbolic_SeqBAIJ, 1459 0, 1460 MatGetArray_SeqBAIJ, 1461 MatRestoreArray_SeqBAIJ, 1462 MatDuplicate_SeqBAIJ, 1463 0, 1464 0, 1465 MatILUFactor_SeqBAIJ, 1466 0, 1467 0, 1468 MatGetSubMatrices_SeqBAIJ, 1469 MatIncreaseOverlap_SeqBAIJ, 1470 MatGetValues_SeqBAIJ, 1471 0, 1472 MatPrintHelp_SeqBAIJ, 1473 MatScale_SeqBAIJ, 1474 0, 1475 0, 1476 0, 1477 MatGetBlockSize_SeqBAIJ, 1478 MatGetRowIJ_SeqBAIJ, 1479 MatRestoreRowIJ_SeqBAIJ, 1480 0, 1481 0, 1482 0, 1483 0, 1484 0, 1485 0, 1486 MatSetValuesBlocked_SeqBAIJ, 1487 MatGetSubMatrix_SeqBAIJ, 1488 MatDestroy_SeqBAIJ, 1489 MatView_SeqBAIJ, 1490 MatGetMaps_Petsc, 1491 0, 1492 0, 1493 0, 1494 0, 1495 0, 1496 0, 1497 MatGetRowMax_SeqBAIJ, 1498 MatConvert_Basic}; 1499 1500 EXTERN_C_BEGIN 1501 #undef __FUNCT__ 1502 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 1503 int MatStoreValues_SeqBAIJ(Mat mat) 1504 { 1505 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1506 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1507 int ierr; 1508 1509 PetscFunctionBegin; 1510 if (aij->nonew != 1) { 1511 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1512 } 1513 1514 /* allocate space for values if not already there */ 1515 if (!aij->saved_values) { 1516 ierr = PetscMalloc((nz+1)*sizeof(Scalar),&aij->saved_values);CHKERRQ(ierr); 1517 } 1518 1519 /* copy values over */ 1520 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(Scalar));CHKERRQ(ierr); 1521 PetscFunctionReturn(0); 1522 } 1523 EXTERN_C_END 1524 1525 EXTERN_C_BEGIN 1526 #undef __FUNCT__ 1527 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 1528 int MatRetrieveValues_SeqBAIJ(Mat mat) 1529 { 1530 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1531 int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr; 1532 1533 PetscFunctionBegin; 1534 if (aij->nonew != 1) { 1535 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1536 } 1537 if (!aij->saved_values) { 1538 SETERRQ(1,"Must call MatStoreValues(A);first"); 1539 } 1540 1541 /* copy values over */ 1542 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(Scalar));CHKERRQ(ierr); 1543 PetscFunctionReturn(0); 1544 } 1545 EXTERN_C_END 1546 1547 EXTERN_C_BEGIN 1548 extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,Mat*); 1549 EXTERN_C_END 1550 1551 EXTERN_C_BEGIN 1552 #undef __FUNCT__ 1553 #define __FUNCT__ "MatCreate_SeqBAIJ" 1554 int MatCreate_SeqBAIJ(Mat B) 1555 { 1556 int ierr,size; 1557 Mat_SeqBAIJ *b; 1558 1559 PetscFunctionBegin; 1560 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1561 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1562 1563 B->m = B->M = PetscMax(B->m,B->M); 1564 B->n = B->N = PetscMax(B->n,B->N); 1565 ierr = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr); 1566 B->data = (void*)b; 1567 ierr = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr); 1568 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1569 B->factor = 0; 1570 B->lupivotthreshold = 1.0; 1571 B->mapping = 0; 1572 b->row = 0; 1573 b->col = 0; 1574 b->icol = 0; 1575 b->reallocs = 0; 1576 b->saved_values = 0; 1577 #if defined(PETSC_USE_MAT_SINGLE) 1578 b->setvalueslen = 0; 1579 b->setvaluescopy = PETSC_NULL; 1580 #endif 1581 1582 ierr = MapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr); 1583 ierr = MapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr); 1584 1585 b->sorted = PETSC_FALSE; 1586 b->roworiented = PETSC_TRUE; 1587 b->nonew = 0; 1588 b->diag = 0; 1589 b->solve_work = 0; 1590 b->mult_work = 0; 1591 b->spptr = 0; 1592 B->info.nz_unneeded = (PetscReal)b->maxnz; 1593 b->keepzeroedrows = PETSC_FALSE; 1594 1595 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1596 "MatStoreValues_SeqBAIJ", 1597 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 1598 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1599 "MatRetrieveValues_SeqBAIJ", 1600 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 1601 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 1602 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 1603 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 1604 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_SeqBAIJ_SeqAIJ_C", 1605 "MatConvert_SeqBAIJ_SeqAIJ", 1606 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 1607 PetscFunctionReturn(0); 1608 } 1609 EXTERN_C_END 1610 1611 #undef __FUNCT__ 1612 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 1613 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1614 { 1615 Mat C; 1616 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 1617 int i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr; 1618 1619 PetscFunctionBegin; 1620 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1621 1622 *B = 0; 1623 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1624 ierr = MatSetType(C,MATSEQBAIJ);CHKERRQ(ierr); 1625 c = (Mat_SeqBAIJ*)C->data; 1626 1627 c->bs = a->bs; 1628 c->bs2 = a->bs2; 1629 c->mbs = a->mbs; 1630 c->nbs = a->nbs; 1631 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1632 1633 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 1634 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 1635 for (i=0; i<mbs; i++) { 1636 c->imax[i] = a->imax[i]; 1637 c->ilen[i] = a->ilen[i]; 1638 } 1639 1640 /* allocate the matrix space */ 1641 c->singlemalloc = PETSC_TRUE; 1642 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1643 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1644 c->j = (int*)(c->a + nz*bs2); 1645 c->i = c->j + nz; 1646 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1647 if (mbs > 0) { 1648 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1649 if (cpvalues == MAT_COPY_VALUES) { 1650 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1651 } else { 1652 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1653 } 1654 } 1655 1656 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1657 c->sorted = a->sorted; 1658 c->roworiented = a->roworiented; 1659 c->nonew = a->nonew; 1660 1661 if (a->diag) { 1662 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 1663 PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 1664 for (i=0; i<mbs; i++) { 1665 c->diag[i] = a->diag[i]; 1666 } 1667 } else c->diag = 0; 1668 c->nz = a->nz; 1669 c->maxnz = a->maxnz; 1670 c->solve_work = 0; 1671 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1672 c->mult_work = 0; 1673 C->preallocated = PETSC_TRUE; 1674 C->assembled = PETSC_TRUE; 1675 *B = C; 1676 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1677 PetscFunctionReturn(0); 1678 } 1679 1680 EXTERN_C_BEGIN 1681 #undef __FUNCT__ 1682 #define __FUNCT__ "MatLoad_SeqBAIJ" 1683 int MatLoad_SeqBAIJ(PetscViewer viewer,MatType type,Mat *A) 1684 { 1685 Mat_SeqBAIJ *a; 1686 Mat B; 1687 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1688 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 1689 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1690 int *masked,nmask,tmp,bs2,ishift; 1691 Scalar *aa; 1692 MPI_Comm comm = ((PetscObject)viewer)->comm; 1693 1694 PetscFunctionBegin; 1695 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1696 bs2 = bs*bs; 1697 1698 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1699 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1700 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1701 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1702 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1703 M = header[1]; N = header[2]; nz = header[3]; 1704 1705 if (header[3] < 0) { 1706 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 1707 } 1708 1709 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1710 1711 /* 1712 This code adds extra rows to make sure the number of rows is 1713 divisible by the blocksize 1714 */ 1715 mbs = M/bs; 1716 extra_rows = bs - M + bs*(mbs); 1717 if (extra_rows == bs) extra_rows = 0; 1718 else mbs++; 1719 if (extra_rows) { 1720 PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 1721 } 1722 1723 /* read in row lengths */ 1724 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 1725 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1726 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1727 1728 /* read in column indices */ 1729 ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 1730 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1731 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1732 1733 /* loop over row lengths determining block row lengths */ 1734 ierr = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr); 1735 ierr = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1736 ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 1737 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1738 masked = mask + mbs; 1739 rowcount = 0; nzcount = 0; 1740 for (i=0; i<mbs; i++) { 1741 nmask = 0; 1742 for (j=0; j<bs; j++) { 1743 kmax = rowlengths[rowcount]; 1744 for (k=0; k<kmax; k++) { 1745 tmp = jj[nzcount++]/bs; 1746 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1747 } 1748 rowcount++; 1749 } 1750 browlengths[i] += nmask; 1751 /* zero out the mask elements we set */ 1752 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1753 } 1754 1755 /* create our matrix */ 1756 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr); 1757 B = *A; 1758 a = (Mat_SeqBAIJ*)B->data; 1759 1760 /* set matrix "i" values */ 1761 a->i[0] = 0; 1762 for (i=1; i<= mbs; i++) { 1763 a->i[i] = a->i[i-1] + browlengths[i-1]; 1764 a->ilen[i-1] = browlengths[i-1]; 1765 } 1766 a->nz = 0; 1767 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 1768 1769 /* read in nonzero values */ 1770 ierr = PetscMalloc((nz+extra_rows)*sizeof(Scalar),&aa);CHKERRQ(ierr); 1771 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1772 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1773 1774 /* set "a" and "j" values into matrix */ 1775 nzcount = 0; jcount = 0; 1776 for (i=0; i<mbs; i++) { 1777 nzcountb = nzcount; 1778 nmask = 0; 1779 for (j=0; j<bs; j++) { 1780 kmax = rowlengths[i*bs+j]; 1781 for (k=0; k<kmax; k++) { 1782 tmp = jj[nzcount++]/bs; 1783 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 1784 } 1785 } 1786 /* sort the masked values */ 1787 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1788 1789 /* set "j" values into matrix */ 1790 maskcount = 1; 1791 for (j=0; j<nmask; j++) { 1792 a->j[jcount++] = masked[j]; 1793 mask[masked[j]] = maskcount++; 1794 } 1795 /* set "a" values into matrix */ 1796 ishift = bs2*a->i[i]; 1797 for (j=0; j<bs; j++) { 1798 kmax = rowlengths[i*bs+j]; 1799 for (k=0; k<kmax; k++) { 1800 tmp = jj[nzcountb]/bs ; 1801 block = mask[tmp] - 1; 1802 point = jj[nzcountb] - bs*tmp; 1803 idx = ishift + bs2*block + j + bs*point; 1804 a->a[idx] = (MatScalar)aa[nzcountb++]; 1805 } 1806 } 1807 /* zero out the mask elements we set */ 1808 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1809 } 1810 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1811 1812 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1813 ierr = PetscFree(browlengths);CHKERRQ(ierr); 1814 ierr = PetscFree(aa);CHKERRQ(ierr); 1815 ierr = PetscFree(jj);CHKERRQ(ierr); 1816 ierr = PetscFree(mask);CHKERRQ(ierr); 1817 1818 B->assembled = PETSC_TRUE; 1819 1820 ierr = MatView_Private(B);CHKERRQ(ierr); 1821 PetscFunctionReturn(0); 1822 } 1823 EXTERN_C_END 1824 1825 #undef __FUNCT__ 1826 #define __FUNCT__ "MatCreateSeqBAIJ" 1827 /*@C 1828 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 1829 compressed row) format. For good matrix assembly performance the 1830 user should preallocate the matrix storage by setting the parameter nz 1831 (or the array nnz). By setting these parameters accurately, performance 1832 during matrix assembly can be increased by more than a factor of 50. 1833 1834 Collective on MPI_Comm 1835 1836 Input Parameters: 1837 + comm - MPI communicator, set to PETSC_COMM_SELF 1838 . bs - size of block 1839 . m - number of rows 1840 . n - number of columns 1841 . nz - number of nonzero blocks per block row (same for all rows) 1842 - nnz - array containing the number of nonzero blocks in the various block rows 1843 (possibly different for each block row) or PETSC_NULL 1844 1845 Output Parameter: 1846 . A - the matrix 1847 1848 Options Database Keys: 1849 . -mat_no_unroll - uses code that does not unroll the loops in the 1850 block calculations (much slower) 1851 . -mat_block_size - size of the blocks to use 1852 1853 Level: intermediate 1854 1855 Notes: 1856 A nonzero block is any block that as 1 or more nonzeros in it 1857 1858 The block AIJ format is fully compatible with standard Fortran 77 1859 storage. That is, the stored row and column indices can begin at 1860 either one (as in Fortran) or zero. See the users' manual for details. 1861 1862 Specify the preallocated storage with either nz or nnz (not both). 1863 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1864 allocation. For additional details, see the users manual chapter on 1865 matrices. 1866 1867 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1868 @*/ 1869 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A) 1870 { 1871 int ierr; 1872 1873 PetscFunctionBegin; 1874 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1875 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 1876 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1877 PetscFunctionReturn(0); 1878 } 1879 1880 #undef __FUNCT__ 1881 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 1882 /*@C 1883 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 1884 per row in the matrix. For good matrix assembly performance the 1885 user should preallocate the matrix storage by setting the parameter nz 1886 (or the array nnz). By setting these parameters accurately, performance 1887 during matrix assembly can be increased by more than a factor of 50. 1888 1889 Collective on MPI_Comm 1890 1891 Input Parameters: 1892 + A - the matrix 1893 . bs - size of block 1894 . nz - number of block nonzeros per block row (same for all rows) 1895 - nnz - array containing the number of block nonzeros in the various block rows 1896 (possibly different for each block row) or PETSC_NULL 1897 1898 Options Database Keys: 1899 . -mat_no_unroll - uses code that does not unroll the loops in the 1900 block calculations (much slower) 1901 . -mat_block_size - size of the blocks to use 1902 1903 Level: intermediate 1904 1905 Notes: 1906 The block AIJ format is fully compatible with standard Fortran 77 1907 storage. That is, the stored row and column indices can begin at 1908 either one (as in Fortran) or zero. See the users' manual for details. 1909 1910 Specify the preallocated storage with either nz or nnz (not both). 1911 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1912 allocation. For additional details, see the users manual chapter on 1913 matrices. 1914 1915 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1916 @*/ 1917 int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz) 1918 { 1919 Mat_SeqBAIJ *b; 1920 int i,len,ierr,mbs,nbs,bs2,newbs = bs; 1921 PetscTruth flg; 1922 1923 PetscFunctionBegin; 1924 ierr = PetscTypeCompare((PetscObject)B,MATSEQBAIJ,&flg);CHKERRQ(ierr); 1925 if (!flg) PetscFunctionReturn(0); 1926 1927 B->preallocated = PETSC_TRUE; 1928 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr); 1929 if (nnz && newbs != bs) { 1930 SETERRQ(1,"Cannot change blocksize from command line if setting nnz"); 1931 } 1932 bs = newbs; 1933 1934 mbs = B->m/bs; 1935 nbs = B->n/bs; 1936 bs2 = bs*bs; 1937 1938 if (mbs*bs!=B->m || nbs*bs!=B->n) { 1939 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs); 1940 } 1941 1942 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1943 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 1944 if (nnz) { 1945 for (i=0; i<mbs; i++) { 1946 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1947 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs); 1948 } 1949 } 1950 1951 b = (Mat_SeqBAIJ*)B->data; 1952 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1953 if (!flg) { 1954 switch (bs) { 1955 case 1: 1956 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 1957 B->ops->solve = MatSolve_SeqBAIJ_1; 1958 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_1; 1959 B->ops->mult = MatMult_SeqBAIJ_1; 1960 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 1961 break; 1962 case 2: 1963 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 1964 B->ops->solve = MatSolve_SeqBAIJ_2; 1965 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_2; 1966 B->ops->mult = MatMult_SeqBAIJ_2; 1967 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 1968 break; 1969 case 3: 1970 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 1971 B->ops->solve = MatSolve_SeqBAIJ_3; 1972 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_3; 1973 B->ops->mult = MatMult_SeqBAIJ_3; 1974 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 1975 break; 1976 case 4: 1977 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 1978 B->ops->solve = MatSolve_SeqBAIJ_4; 1979 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_4; 1980 B->ops->mult = MatMult_SeqBAIJ_4; 1981 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 1982 break; 1983 case 5: 1984 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 1985 B->ops->solve = MatSolve_SeqBAIJ_5; 1986 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_5; 1987 B->ops->mult = MatMult_SeqBAIJ_5; 1988 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 1989 break; 1990 case 6: 1991 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 1992 B->ops->solve = MatSolve_SeqBAIJ_6; 1993 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_6; 1994 B->ops->mult = MatMult_SeqBAIJ_6; 1995 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 1996 break; 1997 case 7: 1998 B->ops->mult = MatMult_SeqBAIJ_7; 1999 B->ops->solve = MatSolve_SeqBAIJ_7; 2000 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_7; 2001 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2002 break; 2003 default: 2004 B->ops->mult = MatMult_SeqBAIJ_N; 2005 B->ops->solve = MatSolve_SeqBAIJ_N; 2006 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2007 break; 2008 } 2009 } 2010 b->bs = bs; 2011 b->mbs = mbs; 2012 b->nbs = nbs; 2013 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 2014 if (!nnz) { 2015 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2016 else if (nz <= 0) nz = 1; 2017 for (i=0; i<mbs; i++) b->imax[i] = nz; 2018 nz = nz*mbs; 2019 } else { 2020 nz = 0; 2021 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2022 } 2023 2024 /* allocate the matrix space */ 2025 len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 2026 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 2027 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2028 b->j = (int*)(b->a + nz*bs2); 2029 ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr); 2030 b->i = b->j + nz; 2031 b->singlemalloc = PETSC_TRUE; 2032 2033 b->i[0] = 0; 2034 for (i=1; i<mbs+1; i++) { 2035 b->i[i] = b->i[i-1] + b->imax[i-1]; 2036 } 2037 2038 /* b->ilen will count nonzeros in each block row so far. */ 2039 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 2040 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 2041 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2042 2043 b->bs = bs; 2044 b->bs2 = bs2; 2045 b->mbs = mbs; 2046 b->nz = 0; 2047 b->maxnz = nz*bs2; 2048 B->info.nz_unneeded = (PetscReal)b->maxnz; 2049 PetscFunctionReturn(0); 2050 } 2051 2052