1 /*$Id: baij.c,v 1.198 2000/01/26 22:08:15 balay Exp bsmith $*/ 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "sys.h" 8 #include "src/mat/impls/baij/seq/baij.h" 9 #include "src/vec/vecimpl.h" 10 #include "src/inline/spops.h" 11 12 #define CHUNKSIZE 10 13 14 /* 15 Checks for missing diagonals 16 */ 17 #undef __FUNC__ 18 #define __FUNC__ "MatMissingDiagonal_SeqBAIJ" 19 int MatMissingDiagonal_SeqBAIJ(Mat A) 20 { 21 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 22 int *diag,*jj = a->j,i,ierr; 23 24 PetscFunctionBegin; 25 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 26 diag = a->diag; 27 for (i=0; i<a->mbs; i++) { 28 if (jj[diag[i]] != i) { 29 SETERRQ1(1,1,"Matrix is missing diagonal number %d",i); 30 } 31 } 32 PetscFunctionReturn(0); 33 } 34 35 #undef __FUNC__ 36 #define __FUNC__ "MatMarkDiagonal_SeqBAIJ" 37 int MatMarkDiagonal_SeqBAIJ(Mat A) 38 { 39 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 40 int i,j,*diag,m = a->mbs; 41 42 PetscFunctionBegin; 43 if (a->diag) PetscFunctionReturn(0); 44 45 diag = (int*)PetscMalloc((m+1)*sizeof(int));CHKPTRQ(diag); 46 PLogObjectMemory(A,(m+1)*sizeof(int)); 47 for (i=0; i<m; i++) { 48 diag[i] = a->i[i+1]; 49 for (j=a->i[i]; j<a->i[i+1]; j++) { 50 if (a->j[j] == i) { 51 diag[i] = j; 52 break; 53 } 54 } 55 } 56 a->diag = diag; 57 PetscFunctionReturn(0); 58 } 59 60 61 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 62 63 #undef __FUNC__ 64 #define __FUNC__ "MatGetRowIJ_SeqBAIJ" 65 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 66 { 67 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 68 int ierr,n = a->mbs,i; 69 70 PetscFunctionBegin; 71 *nn = n; 72 if (!ia) PetscFunctionReturn(0); 73 if (symmetric) { 74 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 75 } else if (oshift == 1) { 76 /* temporarily add 1 to i and j indices */ 77 int nz = a->i[n] + 1; 78 for (i=0; i<nz; i++) a->j[i]++; 79 for (i=0; i<n+1; i++) a->i[i]++; 80 *ia = a->i; *ja = a->j; 81 } else { 82 *ia = a->i; *ja = a->j; 83 } 84 85 PetscFunctionReturn(0); 86 } 87 88 #undef __FUNC__ 89 #define __FUNC__ "MatRestoreRowIJ_SeqBAIJ" 90 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja, 91 PetscTruth *done) 92 { 93 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 94 int i,n = a->mbs,ierr; 95 96 PetscFunctionBegin; 97 if (!ia) PetscFunctionReturn(0); 98 if (symmetric) { 99 ierr = PetscFree(*ia);CHKERRQ(ierr); 100 ierr = PetscFree(*ja);CHKERRQ(ierr); 101 } else if (oshift == 1) { 102 int nz = a->i[n]; 103 for (i=0; i<nz; i++) a->j[i]--; 104 for (i=0; i<n+1; i++) a->i[i]--; 105 } 106 PetscFunctionReturn(0); 107 } 108 109 #undef __FUNC__ 110 #define __FUNC__ "MatGetBlockSize_SeqBAIJ" 111 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs) 112 { 113 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 114 115 PetscFunctionBegin; 116 *bs = baij->bs; 117 PetscFunctionReturn(0); 118 } 119 120 121 #undef __FUNC__ 122 #define __FUNC__ "MatDestroy_SeqBAIJ" 123 int MatDestroy_SeqBAIJ(Mat A) 124 { 125 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 126 int ierr; 127 128 PetscFunctionBegin; 129 if (A->mapping) { 130 ierr = ISLocalToGlobalMappingDestroy(A->mapping);CHKERRQ(ierr); 131 } 132 if (A->bmapping) { 133 ierr = ISLocalToGlobalMappingDestroy(A->bmapping);CHKERRQ(ierr); 134 } 135 if (A->rmap) { 136 ierr = MapDestroy(A->rmap);CHKERRQ(ierr); 137 } 138 if (A->cmap) { 139 ierr = MapDestroy(A->cmap);CHKERRQ(ierr); 140 } 141 #if defined(PETSC_USE_LOG) 142 PLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz); 143 #endif 144 ierr = PetscFree(a->a);CHKERRQ(ierr); 145 if (!a->singlemalloc) { 146 ierr = PetscFree(a->i);CHKERRQ(ierr); 147 ierr = PetscFree(a->j);CHKERRQ(ierr); 148 } 149 if (a->row) { 150 ierr = ISDestroy(a->row);CHKERRQ(ierr); 151 } 152 if (a->col) { 153 ierr = ISDestroy(a->col);CHKERRQ(ierr); 154 } 155 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 156 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 157 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 158 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 159 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 160 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 161 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 162 ierr = PetscFree(a);CHKERRQ(ierr); 163 PLogObjectDestroy(A); 164 PetscHeaderDestroy(A); 165 PetscFunctionReturn(0); 166 } 167 168 #undef __FUNC__ 169 #define __FUNC__ "MatSetOption_SeqBAIJ" 170 int MatSetOption_SeqBAIJ(Mat A,MatOption op) 171 { 172 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 173 174 PetscFunctionBegin; 175 if (op == MAT_ROW_ORIENTED) a->roworiented = PETSC_TRUE; 176 else if (op == MAT_COLUMN_ORIENTED) a->roworiented = PETSC_FALSE; 177 else if (op == MAT_COLUMNS_SORTED) a->sorted = PETSC_TRUE; 178 else if (op == MAT_COLUMNS_UNSORTED) a->sorted = PETSC_FALSE; 179 else if (op == MAT_KEEP_ZEROED_ROWS) a->keepzeroedrows = PETSC_TRUE; 180 else if (op == MAT_NO_NEW_NONZERO_LOCATIONS) a->nonew = 1; 181 else if (op == MAT_NEW_NONZERO_LOCATION_ERR) a->nonew = -1; 182 else if (op == MAT_NEW_NONZERO_ALLOCATION_ERR) a->nonew = -2; 183 else if (op == MAT_YES_NEW_NONZERO_LOCATIONS) a->nonew = 0; 184 else if (op == MAT_ROWS_SORTED || 185 op == MAT_ROWS_UNSORTED || 186 op == MAT_SYMMETRIC || 187 op == MAT_STRUCTURALLY_SYMMETRIC || 188 op == MAT_YES_NEW_DIAGONALS || 189 op == MAT_IGNORE_OFF_PROC_ENTRIES || 190 op == MAT_USE_HASH_TABLE) { 191 PLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n"); 192 } else if (op == MAT_NO_NEW_DIAGONALS) { 193 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 194 } else { 195 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 196 } 197 PetscFunctionReturn(0); 198 } 199 200 201 #undef __FUNC__ 202 #define __FUNC__ "MatGetSize_SeqBAIJ" 203 int MatGetSize_SeqBAIJ(Mat A,int *m,int *n) 204 { 205 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 206 207 PetscFunctionBegin; 208 if (m) *m = a->m; 209 if (n) *n = a->n; 210 PetscFunctionReturn(0); 211 } 212 213 #undef __FUNC__ 214 #define __FUNC__ "MatGetOwnershipRange_SeqBAIJ" 215 int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 216 { 217 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 218 219 PetscFunctionBegin; 220 if (m) *m = 0; 221 if (n) *n = a->m; 222 PetscFunctionReturn(0); 223 } 224 225 #undef __FUNC__ 226 #define __FUNC__ "MatGetRow_SeqBAIJ" 227 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 228 { 229 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 230 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 231 MatScalar *aa,*aa_i; 232 Scalar *v_i; 233 234 PetscFunctionBegin; 235 bs = a->bs; 236 ai = a->i; 237 aj = a->j; 238 aa = a->a; 239 bs2 = a->bs2; 240 241 if (row < 0 || row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row out of range"); 242 243 bn = row/bs; /* Block number */ 244 bp = row % bs; /* Block Position */ 245 M = ai[bn+1] - ai[bn]; 246 *nz = bs*M; 247 248 if (v) { 249 *v = 0; 250 if (*nz) { 251 *v = (Scalar*)PetscMalloc((*nz)*sizeof(Scalar));CHKPTRQ(*v); 252 for (i=0; i<M; i++) { /* for each block in the block row */ 253 v_i = *v + i*bs; 254 aa_i = aa + bs2*(ai[bn] + i); 255 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 256 } 257 } 258 } 259 260 if (idx) { 261 *idx = 0; 262 if (*nz) { 263 *idx = (int*)PetscMalloc((*nz)*sizeof(int));CHKPTRQ(*idx); 264 for (i=0; i<M; i++) { /* for each block in the block row */ 265 idx_i = *idx + i*bs; 266 itmp = bs*aj[ai[bn] + i]; 267 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 268 } 269 } 270 } 271 PetscFunctionReturn(0); 272 } 273 274 #undef __FUNC__ 275 #define __FUNC__ "MatRestoreRow_SeqBAIJ" 276 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 277 { 278 int ierr; 279 280 PetscFunctionBegin; 281 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 282 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 283 PetscFunctionReturn(0); 284 } 285 286 #undef __FUNC__ 287 #define __FUNC__ "MatTranspose_SeqBAIJ" 288 int MatTranspose_SeqBAIJ(Mat A,Mat *B) 289 { 290 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 291 Mat C; 292 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 293 int *rows,*cols,bs2=a->bs2,refct; 294 MatScalar *array = a->a; 295 296 PetscFunctionBegin; 297 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Square matrix only for in-place"); 298 col = (int*)PetscMalloc((1+nbs)*sizeof(int));CHKPTRQ(col); 299 ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr); 300 301 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 302 ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C);CHKERRQ(ierr); 303 ierr = PetscFree(col);CHKERRQ(ierr); 304 rows = (int*)PetscMalloc(2*bs*sizeof(int));CHKPTRQ(rows); 305 cols = rows + bs; 306 for (i=0; i<mbs; i++) { 307 cols[0] = i*bs; 308 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 309 len = ai[i+1] - ai[i]; 310 for (j=0; j<len; j++) { 311 rows[0] = (*aj++)*bs; 312 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 313 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 314 array += bs2; 315 } 316 } 317 ierr = PetscFree(rows);CHKERRQ(ierr); 318 319 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 320 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 321 322 if (B) { 323 *B = C; 324 } else { 325 PetscOps *Abops; 326 MatOps Aops; 327 328 /* This isn't really an in-place transpose */ 329 ierr = PetscFree(a->a);CHKERRQ(ierr); 330 if (!a->singlemalloc) { 331 ierr = PetscFree(a->i);CHKERRQ(ierr); 332 ierr = PetscFree(a->j);CHKERRQ(ierr); 333 } 334 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 335 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 336 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 337 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 338 ierr = PetscFree(a);CHKERRQ(ierr); 339 340 341 ierr = MapDestroy(A->rmap);CHKERRQ(ierr); 342 ierr = MapDestroy(A->cmap);CHKERRQ(ierr); 343 344 /* 345 This is horrible, horrible code. We need to keep the 346 A pointers for the bops and ops but copy everything 347 else from C. 348 */ 349 Abops = A->bops; 350 Aops = A->ops; 351 refct = A->refct; 352 ierr = PetscMemcpy(A,C,sizeof(struct _p_Mat));CHKERRQ(ierr); 353 A->bops = Abops; 354 A->ops = Aops; 355 A->qlist = 0; 356 A->refct = refct; 357 358 PetscHeaderDestroy(C); 359 } 360 PetscFunctionReturn(0); 361 } 362 363 #undef __FUNC__ 364 #define __FUNC__ "MatView_SeqBAIJ_Binary" 365 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer) 366 { 367 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 368 int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 369 Scalar *aa; 370 FILE *file; 371 372 PetscFunctionBegin; 373 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 374 col_lens = (int*)PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens); 375 col_lens[0] = MAT_COOKIE; 376 377 col_lens[1] = a->m; 378 col_lens[2] = a->n; 379 col_lens[3] = a->nz*bs2; 380 381 /* store lengths of each row and write (including header) to file */ 382 count = 0; 383 for (i=0; i<a->mbs; i++) { 384 for (j=0; j<bs; j++) { 385 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 386 } 387 } 388 ierr = PetscBinaryWrite(fd,col_lens,4+a->m,PETSC_INT,1);CHKERRQ(ierr); 389 ierr = PetscFree(col_lens);CHKERRQ(ierr); 390 391 /* store column indices (zero start index) */ 392 jj = (int*)PetscMalloc((a->nz+1)*bs2*sizeof(int));CHKPTRQ(jj); 393 count = 0; 394 for (i=0; i<a->mbs; i++) { 395 for (j=0; j<bs; j++) { 396 for (k=a->i[i]; k<a->i[i+1]; k++) { 397 for (l=0; l<bs; l++) { 398 jj[count++] = bs*a->j[k] + l; 399 } 400 } 401 } 402 } 403 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr); 404 ierr = PetscFree(jj);CHKERRQ(ierr); 405 406 /* store nonzero values */ 407 aa = (Scalar*)PetscMalloc((a->nz+1)*bs2*sizeof(Scalar));CHKPTRQ(aa); 408 count = 0; 409 for (i=0; i<a->mbs; i++) { 410 for (j=0; j<bs; j++) { 411 for (k=a->i[i]; k<a->i[i+1]; k++) { 412 for (l=0; l<bs; l++) { 413 aa[count++] = a->a[bs2*k + l*bs + j]; 414 } 415 } 416 } 417 } 418 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr); 419 ierr = PetscFree(aa);CHKERRQ(ierr); 420 421 ierr = ViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 422 if (file) { 423 fprintf(file,"-matload_block_size %d\n",a->bs); 424 } 425 PetscFunctionReturn(0); 426 } 427 428 #undef __FUNC__ 429 #define __FUNC__ "MatView_SeqBAIJ_ASCII" 430 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer) 431 { 432 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 433 int ierr,i,j,format,bs = a->bs,k,l,bs2=a->bs2; 434 char *outputname; 435 436 PetscFunctionBegin; 437 ierr = ViewerGetOutputname(viewer,&outputname);CHKERRQ(ierr); 438 ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 439 if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) { 440 ierr = ViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 441 } else if (format == VIEWER_FORMAT_ASCII_MATLAB) { 442 SETERRQ(PETSC_ERR_SUP,0,"Matlab format not supported"); 443 } else if (format == VIEWER_FORMAT_ASCII_COMMON) { 444 ierr = ViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 445 for (i=0; i<a->mbs; i++) { 446 for (j=0; j<bs; j++) { 447 ierr = ViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 448 for (k=a->i[i]; k<a->i[i+1]; k++) { 449 for (l=0; l<bs; l++) { 450 #if defined(PETSC_USE_COMPLEX) 451 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 452 ierr = ViewerASCIIPrintf(viewer," %d %g + %gi",bs*a->j[k]+l, 453 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 454 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 455 ierr = ViewerASCIIPrintf(viewer," %d %g - %gi",bs*a->j[k]+l, 456 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 457 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 458 ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 459 } 460 #else 461 if (a->a[bs2*k + l*bs + j] != 0.0) { 462 ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 463 } 464 #endif 465 } 466 } 467 ierr = ViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 468 } 469 } 470 ierr = ViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 471 } else { 472 ierr = ViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 473 for (i=0; i<a->mbs; i++) { 474 for (j=0; j<bs; j++) { 475 ierr = ViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 476 for (k=a->i[i]; k<a->i[i+1]; k++) { 477 for (l=0; l<bs; l++) { 478 #if defined(PETSC_USE_COMPLEX) 479 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 480 ierr = ViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l, 481 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 482 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 483 ierr = ViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l, 484 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 485 } else { 486 ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 487 } 488 #else 489 ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 490 #endif 491 } 492 } 493 ierr = ViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 494 } 495 } 496 ierr = ViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 497 } 498 ierr = ViewerFlush(viewer);CHKERRQ(ierr); 499 PetscFunctionReturn(0); 500 } 501 502 #undef __FUNC__ 503 #define __FUNC__ "MatView_SeqBAIJ_Draw_Zoom" 504 static int MatView_SeqBAIJ_Draw_Zoom(Draw draw,void *Aa) 505 { 506 Mat A = (Mat) Aa; 507 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 508 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2; 509 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 510 MatScalar *aa; 511 Viewer viewer; 512 513 PetscFunctionBegin; 514 515 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 516 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 517 518 ierr = DrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 519 520 /* loop over matrix elements drawing boxes */ 521 color = DRAW_BLUE; 522 for (i=0,row=0; i<mbs; i++,row+=bs) { 523 for (j=a->i[i]; j<a->i[i+1]; j++) { 524 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 525 x_l = a->j[j]*bs; x_r = x_l + 1.0; 526 aa = a->a + j*bs2; 527 for (k=0; k<bs; k++) { 528 for (l=0; l<bs; l++) { 529 if (PetscRealPart(*aa++) >= 0.) continue; 530 ierr = DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 531 } 532 } 533 } 534 } 535 color = DRAW_CYAN; 536 for (i=0,row=0; i<mbs; i++,row+=bs) { 537 for (j=a->i[i]; j<a->i[i+1]; j++) { 538 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 539 x_l = a->j[j]*bs; x_r = x_l + 1.0; 540 aa = a->a + j*bs2; 541 for (k=0; k<bs; k++) { 542 for (l=0; l<bs; l++) { 543 if (PetscRealPart(*aa++) != 0.) continue; 544 ierr = DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 545 } 546 } 547 } 548 } 549 550 color = DRAW_RED; 551 for (i=0,row=0; i<mbs; i++,row+=bs) { 552 for (j=a->i[i]; j<a->i[i+1]; j++) { 553 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 554 x_l = a->j[j]*bs; x_r = x_l + 1.0; 555 aa = a->a + j*bs2; 556 for (k=0; k<bs; k++) { 557 for (l=0; l<bs; l++) { 558 if (PetscRealPart(*aa++) <= 0.) continue; 559 ierr = DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 560 } 561 } 562 } 563 } 564 PetscFunctionReturn(0); 565 } 566 567 #undef __FUNC__ 568 #define __FUNC__ "MatView_SeqBAIJ_Draw" 569 static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer) 570 { 571 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 572 int ierr; 573 PetscReal xl,yl,xr,yr,w,h; 574 Draw draw; 575 PetscTruth isnull; 576 577 PetscFunctionBegin; 578 579 ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 580 ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 581 582 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 583 xr = a->n; yr = a->m; h = yr/10.0; w = xr/10.0; 584 xr += w; yr += h; xl = -w; yl = -h; 585 ierr = DrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 586 ierr = DrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 587 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 588 PetscFunctionReturn(0); 589 } 590 591 #undef __FUNC__ 592 #define __FUNC__ "MatView_SeqBAIJ" 593 int MatView_SeqBAIJ(Mat A,Viewer viewer) 594 { 595 int ierr; 596 PetscTruth issocket,isascii,isbinary,isdraw; 597 598 PetscFunctionBegin; 599 ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr); 600 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 601 ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr); 602 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 603 if (issocket) { 604 SETERRQ(PETSC_ERR_SUP,0,"Socket viewer not supported"); 605 } else if (isascii){ 606 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 607 } else if (isbinary) { 608 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 609 } else if (isdraw) { 610 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 611 } else { 612 SETERRQ1(1,1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name); 613 } 614 PetscFunctionReturn(0); 615 } 616 617 618 #undef __FUNC__ 619 #define __FUNC__ "MatGetValues_SeqBAIJ" 620 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v) 621 { 622 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 623 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 624 int *ai = a->i,*ailen = a->ilen; 625 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 626 MatScalar *ap,*aa = a->a,zero = 0.0; 627 628 PetscFunctionBegin; 629 for (k=0; k<m; k++) { /* loop over rows */ 630 row = im[k]; brow = row/bs; 631 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 632 if (row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 633 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 634 nrow = ailen[brow]; 635 for (l=0; l<n; l++) { /* loop over columns */ 636 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 637 if (in[l] >= a->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 638 col = in[l] ; 639 bcol = col/bs; 640 cidx = col%bs; 641 ridx = row%bs; 642 high = nrow; 643 low = 0; /* assume unsorted */ 644 while (high-low > 5) { 645 t = (low+high)/2; 646 if (rp[t] > bcol) high = t; 647 else low = t; 648 } 649 for (i=low; i<high; i++) { 650 if (rp[i] > bcol) break; 651 if (rp[i] == bcol) { 652 *v++ = ap[bs2*i+bs*cidx+ridx]; 653 goto finished; 654 } 655 } 656 *v++ = zero; 657 finished:; 658 } 659 } 660 PetscFunctionReturn(0); 661 } 662 663 664 #undef __FUNC__ 665 #define __FUNC__ "MatSetValuesBlocked_SeqBAIJ" 666 int MatSetValuesBlocked_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 667 { 668 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 669 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 670 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 671 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 672 Scalar *value = v; 673 MatScalar *ap,*aa=a->a,*bap; 674 675 PetscFunctionBegin; 676 if (roworiented) { 677 stepval = (n-1)*bs; 678 } else { 679 stepval = (m-1)*bs; 680 } 681 for (k=0; k<m; k++) { /* loop over added rows */ 682 row = im[k]; 683 if (row < 0) continue; 684 #if defined(PETSC_USE_BOPT_g) 685 if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 686 #endif 687 rp = aj + ai[row]; 688 ap = aa + bs2*ai[row]; 689 rmax = imax[row]; 690 nrow = ailen[row]; 691 low = 0; 692 for (l=0; l<n; l++) { /* loop over added columns */ 693 if (in[l] < 0) continue; 694 #if defined(PETSC_USE_BOPT_g) 695 if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 696 #endif 697 col = in[l]; 698 if (roworiented) { 699 value = v + k*(stepval+bs)*bs + l*bs; 700 } else { 701 value = v + l*(stepval+bs)*bs + k*bs; 702 } 703 if (!sorted) low = 0; high = nrow; 704 while (high-low > 7) { 705 t = (low+high)/2; 706 if (rp[t] > col) high = t; 707 else low = t; 708 } 709 for (i=low; i<high; i++) { 710 if (rp[i] > col) break; 711 if (rp[i] == col) { 712 bap = ap + bs2*i; 713 if (roworiented) { 714 if (is == ADD_VALUES) { 715 for (ii=0; ii<bs; ii++,value+=stepval) { 716 for (jj=ii; jj<bs2; jj+=bs) { 717 bap[jj] += *value++; 718 } 719 } 720 } else { 721 for (ii=0; ii<bs; ii++,value+=stepval) { 722 for (jj=ii; jj<bs2; jj+=bs) { 723 bap[jj] = *value++; 724 } 725 } 726 } 727 } else { 728 if (is == ADD_VALUES) { 729 for (ii=0; ii<bs; ii++,value+=stepval) { 730 for (jj=0; jj<bs; jj++) { 731 *bap++ += *value++; 732 } 733 } 734 } else { 735 for (ii=0; ii<bs; ii++,value+=stepval) { 736 for (jj=0; jj<bs; jj++) { 737 *bap++ = *value++; 738 } 739 } 740 } 741 } 742 goto noinsert2; 743 } 744 } 745 if (nonew == 1) goto noinsert2; 746 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); 747 if (nrow >= rmax) { 748 /* there is no extra room in row, therefore enlarge */ 749 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 750 MatScalar *new_a; 751 752 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); 753 754 /* malloc new storage space */ 755 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 756 new_a = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); 757 new_j = (int*)(new_a + bs2*new_nz); 758 new_i = new_j + new_nz; 759 760 /* copy over old data into new slots */ 761 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 762 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 763 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 764 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 765 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 766 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 767 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 768 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 769 /* free up old matrix storage */ 770 ierr = PetscFree(a->a);CHKERRQ(ierr); 771 if (!a->singlemalloc) { 772 ierr = PetscFree(a->i);CHKERRQ(ierr); 773 ierr = PetscFree(a->j);CHKERRQ(ierr); 774 } 775 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 776 a->singlemalloc = PETSC_TRUE; 777 778 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 779 rmax = imax[row] = imax[row] + CHUNKSIZE; 780 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 781 a->maxnz += bs2*CHUNKSIZE; 782 a->reallocs++; 783 a->nz++; 784 } 785 N = nrow++ - 1; 786 /* shift up all the later entries in this row */ 787 for (ii=N; ii>=i; ii--) { 788 rp[ii+1] = rp[ii]; 789 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 790 } 791 if (N >= i) { 792 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 793 } 794 rp[i] = col; 795 bap = ap + bs2*i; 796 if (roworiented) { 797 for (ii=0; ii<bs; ii++,value+=stepval) { 798 for (jj=ii; jj<bs2; jj+=bs) { 799 bap[jj] = *value++; 800 } 801 } 802 } else { 803 for (ii=0; ii<bs; ii++,value+=stepval) { 804 for (jj=0; jj<bs; jj++) { 805 *bap++ = *value++; 806 } 807 } 808 } 809 noinsert2:; 810 low = i; 811 } 812 ailen[row] = nrow; 813 } 814 PetscFunctionReturn(0); 815 } 816 817 818 #undef __FUNC__ 819 #define __FUNC__ "MatAssemblyEnd_SeqBAIJ" 820 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 821 { 822 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 823 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 824 int m = a->m,*ip,N,*ailen = a->ilen; 825 int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 826 MatScalar *aa = a->a,*ap; 827 828 PetscFunctionBegin; 829 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 830 831 if (m) rmax = ailen[0]; 832 for (i=1; i<mbs; i++) { 833 /* move each row back by the amount of empty slots (fshift) before it*/ 834 fshift += imax[i-1] - ailen[i-1]; 835 rmax = PetscMax(rmax,ailen[i]); 836 if (fshift) { 837 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 838 N = ailen[i]; 839 for (j=0; j<N; j++) { 840 ip[j-fshift] = ip[j]; 841 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 842 } 843 } 844 ai[i] = ai[i-1] + ailen[i-1]; 845 } 846 if (mbs) { 847 fshift += imax[mbs-1] - ailen[mbs-1]; 848 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 849 } 850 /* reset ilen and imax for each row */ 851 for (i=0; i<mbs; i++) { 852 ailen[i] = imax[i] = ai[i+1] - ai[i]; 853 } 854 a->nz = ai[mbs]; 855 856 /* diagonals may have moved, so kill the diagonal pointers */ 857 if (fshift && a->diag) { 858 ierr = PetscFree(a->diag);CHKERRQ(ierr); 859 PLogObjectMemory(A,-(m+1)*sizeof(int)); 860 a->diag = 0; 861 } 862 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 863 m,a->n,a->bs,fshift*bs2,a->nz*bs2); 864 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n", 865 a->reallocs); 866 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 867 a->reallocs = 0; 868 A->info.nz_unneeded = (PetscReal)fshift*bs2; 869 870 PetscFunctionReturn(0); 871 } 872 873 874 875 /* 876 This function returns an array of flags which indicate the locations of contiguous 877 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 878 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 879 Assume: sizes should be long enough to hold all the values. 880 */ 881 #undef __FUNC__ 882 #define __FUNC__ "MatZeroRows_SeqBAIJ_Check_Blocks" 883 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 884 { 885 int i,j,k,row; 886 PetscTruth flg; 887 888 PetscFunctionBegin; 889 for (i=0,j=0; i<n; j++) { 890 row = idx[i]; 891 if (row%bs!=0) { /* Not the begining of a block */ 892 sizes[j] = 1; 893 i++; 894 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 895 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 896 i++; 897 } else { /* Begining of the block, so check if the complete block exists */ 898 flg = PETSC_TRUE; 899 for (k=1; k<bs; k++) { 900 if (row+k != idx[i+k]) { /* break in the block */ 901 flg = PETSC_FALSE; 902 break; 903 } 904 } 905 if (flg == PETSC_TRUE) { /* No break in the bs */ 906 sizes[j] = bs; 907 i+= bs; 908 } else { 909 sizes[j] = 1; 910 i++; 911 } 912 } 913 } 914 *bs_max = j; 915 PetscFunctionReturn(0); 916 } 917 918 #undef __FUNC__ 919 #define __FUNC__ "MatZeroRows_SeqBAIJ" 920 int MatZeroRows_SeqBAIJ(Mat A,IS is,Scalar *diag) 921 { 922 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 923 int ierr,i,j,k,count,is_n,*is_idx,*rows; 924 int bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max; 925 Scalar zero = 0.0; 926 MatScalar *aa; 927 928 PetscFunctionBegin; 929 /* Make a copy of the IS and sort it */ 930 ierr = ISGetSize(is,&is_n);CHKERRQ(ierr); 931 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 932 933 /* allocate memory for rows,sizes */ 934 rows = (int*)PetscMalloc((3*is_n+1)*sizeof(int));CHKPTRQ(rows); 935 sizes = rows + is_n; 936 937 /* initialize copy IS valurs to rows, and sort them */ 938 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 939 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 940 if (baij->keepzeroedrows) { 941 for (i=0; i<is_n; i++) { sizes[i] = 1; } 942 bs_max = is_n; 943 } else { 944 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 945 } 946 ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 947 948 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 949 row = rows[j]; 950 if (row < 0 || row > baij->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"row %d out of range",row); 951 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 952 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 953 if (sizes[i] == bs && !baij->keepzeroedrows) { 954 if (diag) { 955 if (baij->ilen[row/bs] > 0) { 956 baij->ilen[row/bs] = 1; 957 baij->j[baij->i[row/bs]] = row/bs; 958 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 959 } 960 /* Now insert all the diagoanl values for this bs */ 961 for (k=0; k<bs; k++) { 962 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 963 } 964 } else { /* (!diag) */ 965 baij->ilen[row/bs] = 0; 966 } /* end (!diag) */ 967 } else { /* (sizes[i] != bs) */ 968 #if defined (PETSC_USE_DEBUG) 969 if (sizes[i] != 1) SETERRQ(1,0,"Internal Error. Value should be 1"); 970 #endif 971 for (k=0; k<count; k++) { 972 aa[0] = zero; 973 aa+=bs; 974 } 975 if (diag) { 976 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 977 } 978 } 979 } 980 981 ierr = PetscFree(rows);CHKERRQ(ierr); 982 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 983 PetscFunctionReturn(0); 984 } 985 986 #undef __FUNC__ 987 #define __FUNC__ "MatSetValues_SeqBAIJ" 988 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 989 { 990 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 991 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 992 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 993 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 994 int ridx,cidx,bs2=a->bs2,ierr; 995 MatScalar *ap,value,*aa=a->a,*bap; 996 997 PetscFunctionBegin; 998 for (k=0; k<m; k++) { /* loop over added rows */ 999 row = im[k]; brow = row/bs; 1000 if (row < 0) continue; 1001 #if defined(PETSC_USE_BOPT_g) 1002 if (row >= a->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large: row %d max %d",row,a->m); 1003 #endif 1004 rp = aj + ai[brow]; 1005 ap = aa + bs2*ai[brow]; 1006 rmax = imax[brow]; 1007 nrow = ailen[brow]; 1008 low = 0; 1009 for (l=0; l<n; l++) { /* loop over added columns */ 1010 if (in[l] < 0) continue; 1011 #if defined(PETSC_USE_BOPT_g) 1012 if (in[l] >= a->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large: col %d max %d",in[l],a->n); 1013 #endif 1014 col = in[l]; bcol = col/bs; 1015 ridx = row % bs; cidx = col % bs; 1016 if (roworiented) { 1017 value = v[l + k*n]; 1018 } else { 1019 value = v[k + l*m]; 1020 } 1021 if (!sorted) low = 0; high = nrow; 1022 while (high-low > 7) { 1023 t = (low+high)/2; 1024 if (rp[t] > bcol) high = t; 1025 else low = t; 1026 } 1027 for (i=low; i<high; i++) { 1028 if (rp[i] > bcol) break; 1029 if (rp[i] == bcol) { 1030 bap = ap + bs2*i + bs*cidx + ridx; 1031 if (is == ADD_VALUES) *bap += value; 1032 else *bap = value; 1033 goto noinsert1; 1034 } 1035 } 1036 if (nonew == 1) goto noinsert1; 1037 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); 1038 if (nrow >= rmax) { 1039 /* there is no extra room in row, therefore enlarge */ 1040 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1041 MatScalar *new_a; 1042 1043 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); 1044 1045 /* Malloc new storage space */ 1046 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 1047 new_a = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); 1048 new_j = (int*)(new_a + bs2*new_nz); 1049 new_i = new_j + new_nz; 1050 1051 /* copy over old data into new slots */ 1052 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 1053 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1054 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 1055 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 1056 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 1057 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1058 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1059 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1060 /* free up old matrix storage */ 1061 ierr = PetscFree(a->a);CHKERRQ(ierr); 1062 if (!a->singlemalloc) { 1063 ierr = PetscFree(a->i);CHKERRQ(ierr); 1064 ierr = PetscFree(a->j);CHKERRQ(ierr); 1065 } 1066 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1067 a->singlemalloc = PETSC_TRUE; 1068 1069 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1070 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 1071 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 1072 a->maxnz += bs2*CHUNKSIZE; 1073 a->reallocs++; 1074 a->nz++; 1075 } 1076 N = nrow++ - 1; 1077 /* shift up all the later entries in this row */ 1078 for (ii=N; ii>=i; ii--) { 1079 rp[ii+1] = rp[ii]; 1080 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1081 } 1082 if (N>=i) { 1083 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1084 } 1085 rp[i] = bcol; 1086 ap[bs2*i + bs*cidx + ridx] = value; 1087 noinsert1:; 1088 low = i; 1089 } 1090 ailen[brow] = nrow; 1091 } 1092 PetscFunctionReturn(0); 1093 } 1094 1095 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,PetscReal,Mat*); 1096 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,PetscReal); 1097 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int); 1098 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,int,MatReuse,Mat*); 1099 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatReuse,Mat**); 1100 extern int MatMultTranspose_SeqBAIJ(Mat,Vec,Vec); 1101 extern int MatMultTransposeAdd_SeqBAIJ(Mat,Vec,Vec,Vec); 1102 extern int MatScale_SeqBAIJ(Scalar*,Mat); 1103 extern int MatNorm_SeqBAIJ(Mat,NormType,PetscReal *); 1104 extern int MatEqual_SeqBAIJ(Mat,Mat,PetscTruth*); 1105 extern int MatGetDiagonal_SeqBAIJ(Mat,Vec); 1106 extern int MatDiagonalScale_SeqBAIJ(Mat,Vec,Vec); 1107 extern int MatGetInfo_SeqBAIJ(Mat,MatInfoType,MatInfo *); 1108 extern int MatZeroEntries_SeqBAIJ(Mat); 1109 1110 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec); 1111 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec); 1112 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec); 1113 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec); 1114 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec); 1115 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec); 1116 extern int MatSolve_SeqBAIJ_6(Mat,Vec,Vec); 1117 extern int MatSolve_SeqBAIJ_7(Mat,Vec,Vec); 1118 extern int MatSolveTranspose_SeqBAIJ_7(Mat,Vec,Vec); 1119 extern int MatSolveTranspose_SeqBAIJ_6(Mat,Vec,Vec); 1120 extern int MatSolveTranspose_SeqBAIJ_5(Mat,Vec,Vec); 1121 extern int MatSolveTranspose_SeqBAIJ_4(Mat,Vec,Vec); 1122 extern int MatSolveTranspose_SeqBAIJ_3(Mat,Vec,Vec); 1123 extern int MatSolveTranspose_SeqBAIJ_2(Mat,Vec,Vec); 1124 extern int MatSolveTranspose_SeqBAIJ_1(Mat,Vec,Vec); 1125 1126 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*); 1127 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*); 1128 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*); 1129 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*); 1130 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*); 1131 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*); 1132 extern int MatLUFactorNumeric_SeqBAIJ_6(Mat,Mat*); 1133 1134 extern int MatMult_SeqBAIJ_1(Mat,Vec,Vec); 1135 extern int MatMult_SeqBAIJ_2(Mat,Vec,Vec); 1136 extern int MatMult_SeqBAIJ_3(Mat,Vec,Vec); 1137 extern int MatMult_SeqBAIJ_4(Mat,Vec,Vec); 1138 extern int MatMult_SeqBAIJ_5(Mat,Vec,Vec); 1139 extern int MatMult_SeqBAIJ_6(Mat,Vec,Vec); 1140 extern int MatMult_SeqBAIJ_7(Mat,Vec,Vec); 1141 extern int MatMult_SeqBAIJ_N(Mat,Vec,Vec); 1142 1143 extern int MatMultAdd_SeqBAIJ_1(Mat,Vec,Vec,Vec); 1144 extern int MatMultAdd_SeqBAIJ_2(Mat,Vec,Vec,Vec); 1145 extern int MatMultAdd_SeqBAIJ_3(Mat,Vec,Vec,Vec); 1146 extern int MatMultAdd_SeqBAIJ_4(Mat,Vec,Vec,Vec); 1147 extern int MatMultAdd_SeqBAIJ_5(Mat,Vec,Vec,Vec); 1148 extern int MatMultAdd_SeqBAIJ_6(Mat,Vec,Vec,Vec); 1149 extern int MatMultAdd_SeqBAIJ_7(Mat,Vec,Vec,Vec); 1150 extern int MatMultAdd_SeqBAIJ_N(Mat,Vec,Vec,Vec); 1151 1152 #undef __FUNC__ 1153 #define __FUNC__ "MatILUFactor_SeqBAIJ" 1154 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info) 1155 { 1156 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1157 Mat outA; 1158 int ierr; 1159 PetscTruth row_identity,col_identity; 1160 1161 PetscFunctionBegin; 1162 if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,0,"Only levels = 0 supported for in-place ILU"); 1163 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1164 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1165 if (!row_identity || !col_identity) { 1166 SETERRQ(1,1,"Row and column permutations must be identity for in-place ILU"); 1167 } 1168 1169 outA = inA; 1170 inA->factor = FACTOR_LU; 1171 1172 if (!a->diag) { 1173 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1174 } 1175 /* 1176 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1177 for ILU(0) factorization with natural ordering 1178 */ 1179 switch (a->bs) { 1180 case 1: 1181 inA->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_2_NaturalOrdering; 1182 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering solvetrans BS=1\n"); 1183 case 2: 1184 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering; 1185 inA->ops->solve = MatSolve_SeqBAIJ_2_NaturalOrdering; 1186 inA->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_2_NaturalOrdering; 1187 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 1188 break; 1189 case 3: 1190 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering; 1191 inA->ops->solve = MatSolve_SeqBAIJ_3_NaturalOrdering; 1192 inA->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_3_NaturalOrdering; 1193 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=3\n"); 1194 break; 1195 case 4: 1196 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering; 1197 inA->ops->solve = MatSolve_SeqBAIJ_4_NaturalOrdering; 1198 inA->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_4_NaturalOrdering; 1199 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 1200 break; 1201 case 5: 1202 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering; 1203 inA->ops->solve = MatSolve_SeqBAIJ_5_NaturalOrdering; 1204 inA->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_5_NaturalOrdering; 1205 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 1206 break; 1207 case 6: 1208 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering; 1209 inA->ops->solve = MatSolve_SeqBAIJ_6_NaturalOrdering; 1210 inA->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_6_NaturalOrdering; 1211 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 1212 break; 1213 case 7: 1214 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering; 1215 inA->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_7_NaturalOrdering; 1216 inA->ops->solve = MatSolve_SeqBAIJ_7_NaturalOrdering; 1217 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 1218 break; 1219 default: 1220 a->row = row; 1221 a->col = col; 1222 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1223 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1224 1225 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1226 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1227 PLogObjectParent(inA,a->icol); 1228 1229 if (!a->solve_work) { 1230 a->solve_work = (Scalar*)PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work); 1231 PLogObjectMemory(inA,(a->m+a->bs)*sizeof(Scalar)); 1232 } 1233 } 1234 1235 ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr); 1236 1237 PetscFunctionReturn(0); 1238 } 1239 #undef __FUNC__ 1240 #define __FUNC__ "MatPrintHelp_SeqBAIJ" 1241 int MatPrintHelp_SeqBAIJ(Mat A) 1242 { 1243 static PetscTruth called = PETSC_FALSE; 1244 MPI_Comm comm = A->comm; 1245 int ierr; 1246 1247 PetscFunctionBegin; 1248 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1249 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1250 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1251 PetscFunctionReturn(0); 1252 } 1253 1254 EXTERN_C_BEGIN 1255 #undef __FUNC__ 1256 #define __FUNC__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1257 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices) 1258 { 1259 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1260 int i,nz,n; 1261 1262 PetscFunctionBegin; 1263 nz = baij->maxnz; 1264 n = baij->n; 1265 for (i=0; i<nz; i++) { 1266 baij->j[i] = indices[i]; 1267 } 1268 baij->nz = nz; 1269 for (i=0; i<n; i++) { 1270 baij->ilen[i] = baij->imax[i]; 1271 } 1272 1273 PetscFunctionReturn(0); 1274 } 1275 EXTERN_C_END 1276 1277 #undef __FUNC__ 1278 #define __FUNC__ "MatSeqBAIJSetColumnIndices" 1279 /*@ 1280 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1281 in the matrix. 1282 1283 Input Parameters: 1284 + mat - the SeqBAIJ matrix 1285 - indices - the column indices 1286 1287 Level: advanced 1288 1289 Notes: 1290 This can be called if you have precomputed the nonzero structure of the 1291 matrix and want to provide it to the matrix object to improve the performance 1292 of the MatSetValues() operation. 1293 1294 You MUST have set the correct numbers of nonzeros per row in the call to 1295 MatCreateSeqBAIJ(). 1296 1297 MUST be called before any calls to MatSetValues(); 1298 1299 @*/ 1300 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices) 1301 { 1302 int ierr,(*f)(Mat,int *); 1303 1304 PetscFunctionBegin; 1305 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1306 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void **)&f);CHKERRQ(ierr); 1307 if (f) { 1308 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1309 } else { 1310 SETERRQ(1,1,"Wrong type of matrix to set column indices"); 1311 } 1312 PetscFunctionReturn(0); 1313 } 1314 1315 /* -------------------------------------------------------------------*/ 1316 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 1317 MatGetRow_SeqBAIJ, 1318 MatRestoreRow_SeqBAIJ, 1319 MatMult_SeqBAIJ_N, 1320 MatMultAdd_SeqBAIJ_N, 1321 MatMultTranspose_SeqBAIJ, 1322 MatMultTransposeAdd_SeqBAIJ, 1323 MatSolve_SeqBAIJ_N, 1324 0, 1325 0, 1326 0, 1327 MatLUFactor_SeqBAIJ, 1328 0, 1329 0, 1330 MatTranspose_SeqBAIJ, 1331 MatGetInfo_SeqBAIJ, 1332 MatEqual_SeqBAIJ, 1333 MatGetDiagonal_SeqBAIJ, 1334 MatDiagonalScale_SeqBAIJ, 1335 MatNorm_SeqBAIJ, 1336 0, 1337 MatAssemblyEnd_SeqBAIJ, 1338 0, 1339 MatSetOption_SeqBAIJ, 1340 MatZeroEntries_SeqBAIJ, 1341 MatZeroRows_SeqBAIJ, 1342 MatLUFactorSymbolic_SeqBAIJ, 1343 MatLUFactorNumeric_SeqBAIJ_N, 1344 0, 1345 0, 1346 MatGetSize_SeqBAIJ, 1347 MatGetSize_SeqBAIJ, 1348 MatGetOwnershipRange_SeqBAIJ, 1349 MatILUFactorSymbolic_SeqBAIJ, 1350 0, 1351 0, 1352 0, 1353 MatDuplicate_SeqBAIJ, 1354 0, 1355 0, 1356 MatILUFactor_SeqBAIJ, 1357 0, 1358 0, 1359 MatGetSubMatrices_SeqBAIJ, 1360 MatIncreaseOverlap_SeqBAIJ, 1361 MatGetValues_SeqBAIJ, 1362 0, 1363 MatPrintHelp_SeqBAIJ, 1364 MatScale_SeqBAIJ, 1365 0, 1366 0, 1367 0, 1368 MatGetBlockSize_SeqBAIJ, 1369 MatGetRowIJ_SeqBAIJ, 1370 MatRestoreRowIJ_SeqBAIJ, 1371 0, 1372 0, 1373 0, 1374 0, 1375 0, 1376 0, 1377 MatSetValuesBlocked_SeqBAIJ, 1378 MatGetSubMatrix_SeqBAIJ, 1379 0, 1380 0, 1381 MatGetMaps_Petsc}; 1382 1383 EXTERN_C_BEGIN 1384 #undef __FUNC__ 1385 #define __FUNC__ "MatStoreValues_SeqBAIJ" 1386 int MatStoreValues_SeqBAIJ(Mat mat) 1387 { 1388 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1389 int nz = aij->i[aij->m]*aij->bs*aij->bs2; 1390 int ierr; 1391 1392 PetscFunctionBegin; 1393 if (aij->nonew != 1) { 1394 SETERRQ(1,1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1395 } 1396 1397 /* allocate space for values if not already there */ 1398 if (!aij->saved_values) { 1399 aij->saved_values = (Scalar*)PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(aij->saved_values); 1400 } 1401 1402 /* copy values over */ 1403 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(Scalar));CHKERRQ(ierr); 1404 PetscFunctionReturn(0); 1405 } 1406 EXTERN_C_END 1407 1408 EXTERN_C_BEGIN 1409 #undef __FUNC__ 1410 #define __FUNC__ "MatRetrieveValues_SeqBAIJ" 1411 int MatRetrieveValues_SeqBAIJ(Mat mat) 1412 { 1413 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1414 int nz = aij->i[aij->m]*aij->bs*aij->bs2,ierr; 1415 1416 PetscFunctionBegin; 1417 if (aij->nonew != 1) { 1418 SETERRQ(1,1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1419 } 1420 if (!aij->saved_values) { 1421 SETERRQ(1,1,"Must call MatStoreValues(A);first"); 1422 } 1423 1424 /* copy values over */ 1425 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(Scalar));CHKERRQ(ierr); 1426 PetscFunctionReturn(0); 1427 } 1428 EXTERN_C_END 1429 1430 #undef __FUNC__ 1431 #define __FUNC__ "MatCreateSeqBAIJ" 1432 /*@C 1433 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 1434 compressed row) format. For good matrix assembly performance the 1435 user should preallocate the matrix storage by setting the parameter nz 1436 (or the array nnz). By setting these parameters accurately, performance 1437 during matrix assembly can be increased by more than a factor of 50. 1438 1439 Collective on MPI_Comm 1440 1441 Input Parameters: 1442 + comm - MPI communicator, set to PETSC_COMM_SELF 1443 . bs - size of block 1444 . m - number of rows 1445 . n - number of columns 1446 . nz - number of block nonzeros per block row (same for all rows) 1447 - nnz - array containing the number of block nonzeros in the various block rows 1448 (possibly different for each block row) or PETSC_NULL 1449 1450 Output Parameter: 1451 . A - the matrix 1452 1453 Options Database Keys: 1454 . -mat_no_unroll - uses code that does not unroll the loops in the 1455 block calculations (much slower) 1456 . -mat_block_size - size of the blocks to use 1457 1458 Level: intermediate 1459 1460 Notes: 1461 The block AIJ format is fully compatible with standard Fortran 77 1462 storage. That is, the stored row and column indices can begin at 1463 either one (as in Fortran) or zero. See the users' manual for details. 1464 1465 Specify the preallocated storage with either nz or nnz (not both). 1466 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1467 allocation. For additional details, see the users manual chapter on 1468 matrices. 1469 1470 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1471 @*/ 1472 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A) 1473 { 1474 Mat B; 1475 Mat_SeqBAIJ *b; 1476 int i,len,ierr,mbs,nbs,bs2,size; 1477 PetscTruth flg; 1478 1479 PetscFunctionBegin; 1480 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1481 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,0,"Comm must be of size 1"); 1482 1483 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1484 mbs = m/bs; 1485 nbs = n/bs; 1486 bs2 = bs*bs; 1487 1488 if (mbs*bs!=m || nbs*bs!=n) { 1489 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Number rows, cols must be divisible by blocksize"); 1490 } 1491 1492 if (nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"nz cannot be less than -2: value %d",nz); 1493 if (nnz) { 1494 for (i=0; i<mbs; i++) { 1495 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1496 } 1497 } 1498 1499 *A = 0; 1500 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATSEQBAIJ,"Mat",comm,MatDestroy,MatView); 1501 PLogObjectCreate(B); 1502 B->data = (void*)(b = PetscNew(Mat_SeqBAIJ));CHKPTRQ(b); 1503 ierr = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr); 1504 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1505 ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1506 if (!flg) { 1507 switch (bs) { 1508 case 1: 1509 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 1510 B->ops->solve = MatSolve_SeqBAIJ_1; 1511 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_1; 1512 B->ops->mult = MatMult_SeqBAIJ_1; 1513 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 1514 break; 1515 case 2: 1516 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 1517 B->ops->solve = MatSolve_SeqBAIJ_2; 1518 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_2; 1519 B->ops->mult = MatMult_SeqBAIJ_2; 1520 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 1521 break; 1522 case 3: 1523 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 1524 B->ops->solve = MatSolve_SeqBAIJ_3; 1525 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_3; 1526 B->ops->mult = MatMult_SeqBAIJ_3; 1527 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 1528 break; 1529 case 4: 1530 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 1531 B->ops->solve = MatSolve_SeqBAIJ_4; 1532 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_4; 1533 B->ops->mult = MatMult_SeqBAIJ_4; 1534 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 1535 break; 1536 case 5: 1537 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 1538 B->ops->solve = MatSolve_SeqBAIJ_5; 1539 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_5; 1540 B->ops->mult = MatMult_SeqBAIJ_5; 1541 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 1542 break; 1543 case 6: 1544 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 1545 B->ops->solve = MatSolve_SeqBAIJ_6; 1546 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_6; 1547 B->ops->mult = MatMult_SeqBAIJ_6; 1548 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 1549 break; 1550 case 7: 1551 B->ops->mult = MatMult_SeqBAIJ_7; 1552 B->ops->solve = MatSolve_SeqBAIJ_7; 1553 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_7; 1554 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 1555 break; 1556 } 1557 } 1558 B->ops->destroy = MatDestroy_SeqBAIJ; 1559 B->ops->view = MatView_SeqBAIJ; 1560 B->factor = 0; 1561 B->lupivotthreshold = 1.0; 1562 B->mapping = 0; 1563 b->row = 0; 1564 b->col = 0; 1565 b->icol = 0; 1566 b->reallocs = 0; 1567 b->saved_values = 0; 1568 1569 b->m = m; B->m = m; B->M = m; 1570 b->n = n; B->n = n; B->N = n; 1571 1572 ierr = MapCreateMPI(comm,m,m,&B->rmap);CHKERRQ(ierr); 1573 ierr = MapCreateMPI(comm,n,n,&B->cmap);CHKERRQ(ierr); 1574 1575 b->mbs = mbs; 1576 b->nbs = nbs; 1577 b->imax = (int*)PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(b->imax); 1578 if (!nnz) { 1579 if (nz == PETSC_DEFAULT) nz = 5; 1580 else if (nz <= 0) nz = 1; 1581 for (i=0; i<mbs; i++) b->imax[i] = nz; 1582 nz = nz*mbs; 1583 } else { 1584 nz = 0; 1585 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1586 } 1587 1588 /* allocate the matrix space */ 1589 len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (b->m+1)*sizeof(int); 1590 b->a = (MatScalar*)PetscMalloc(len);CHKPTRQ(b->a); 1591 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1592 b->j = (int*)(b->a + nz*bs2); 1593 ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr); 1594 b->i = b->j + nz; 1595 b->singlemalloc = PETSC_TRUE; 1596 1597 b->i[0] = 0; 1598 for (i=1; i<mbs+1; i++) { 1599 b->i[i] = b->i[i-1] + b->imax[i-1]; 1600 } 1601 1602 /* b->ilen will count nonzeros in each block row so far. */ 1603 b->ilen = (int*)PetscMalloc((mbs+1)*sizeof(int)); 1604 PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1605 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1606 1607 b->bs = bs; 1608 b->bs2 = bs2; 1609 b->mbs = mbs; 1610 b->nz = 0; 1611 b->maxnz = nz*bs2; 1612 b->sorted = PETSC_FALSE; 1613 b->roworiented = PETSC_TRUE; 1614 b->nonew = 0; 1615 b->diag = 0; 1616 b->solve_work = 0; 1617 b->mult_work = 0; 1618 b->spptr = 0; 1619 B->info.nz_unneeded = (PetscReal)b->maxnz; 1620 b->keepzeroedrows = PETSC_FALSE; 1621 1622 *A = B; 1623 ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr); 1624 if (flg) {ierr = MatPrintHelp(B);CHKERRQ(ierr); } 1625 1626 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1627 "MatStoreValues_SeqBAIJ", 1628 (void*)MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 1629 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1630 "MatRetrieveValues_SeqBAIJ", 1631 (void*)MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 1632 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 1633 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 1634 (void*)MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 1635 PetscFunctionReturn(0); 1636 } 1637 1638 #undef __FUNC__ 1639 #define __FUNC__ "MatDuplicate_SeqBAIJ" 1640 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1641 { 1642 Mat C; 1643 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 1644 int i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr; 1645 1646 PetscFunctionBegin; 1647 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,0,"Corrupt matrix"); 1648 1649 *B = 0; 1650 PetscHeaderCreate(C,_p_Mat,struct _MatOps,MAT_COOKIE,MATSEQBAIJ,"Mat",A->comm,MatDestroy,MatView); 1651 PLogObjectCreate(C); 1652 C->data = (void*)(c = PetscNew(Mat_SeqBAIJ));CHKPTRQ(c); 1653 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1654 C->ops->destroy = MatDestroy_SeqBAIJ; 1655 C->ops->view = MatView_SeqBAIJ; 1656 C->factor = A->factor; 1657 c->row = 0; 1658 c->col = 0; 1659 c->icol = 0; 1660 c->saved_values = 0; 1661 c->keepzeroedrows = a->keepzeroedrows; 1662 C->assembled = PETSC_TRUE; 1663 1664 c->m = C->m = a->m; 1665 c->n = C->n = a->n; 1666 C->M = a->m; 1667 C->N = a->n; 1668 1669 c->bs = a->bs; 1670 c->bs2 = a->bs2; 1671 c->mbs = a->mbs; 1672 c->nbs = a->nbs; 1673 1674 c->imax = (int*)PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(c->imax); 1675 c->ilen = (int*)PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(c->ilen); 1676 for (i=0; i<mbs; i++) { 1677 c->imax[i] = a->imax[i]; 1678 c->ilen[i] = a->ilen[i]; 1679 } 1680 1681 /* allocate the matrix space */ 1682 c->singlemalloc = PETSC_TRUE; 1683 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1684 c->a = (MatScalar*)PetscMalloc(len);CHKPTRQ(c->a); 1685 c->j = (int*)(c->a + nz*bs2); 1686 c->i = c->j + nz; 1687 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1688 if (mbs > 0) { 1689 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1690 if (cpvalues == MAT_COPY_VALUES) { 1691 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1692 } else { 1693 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1694 } 1695 } 1696 1697 PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1698 c->sorted = a->sorted; 1699 c->roworiented = a->roworiented; 1700 c->nonew = a->nonew; 1701 1702 if (a->diag) { 1703 c->diag = (int*)PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(c->diag); 1704 PLogObjectMemory(C,(mbs+1)*sizeof(int)); 1705 for (i=0; i<mbs; i++) { 1706 c->diag[i] = a->diag[i]; 1707 } 1708 } else c->diag = 0; 1709 c->nz = a->nz; 1710 c->maxnz = a->maxnz; 1711 c->solve_work = 0; 1712 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1713 c->mult_work = 0; 1714 *B = C; 1715 ierr = FListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1716 PetscFunctionReturn(0); 1717 } 1718 1719 #undef __FUNC__ 1720 #define __FUNC__ "MatLoad_SeqBAIJ" 1721 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A) 1722 { 1723 Mat_SeqBAIJ *a; 1724 Mat B; 1725 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1726 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 1727 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1728 int *masked,nmask,tmp,bs2,ishift; 1729 Scalar *aa; 1730 MPI_Comm comm = ((PetscObject)viewer)->comm; 1731 1732 PetscFunctionBegin; 1733 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1734 bs2 = bs*bs; 1735 1736 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1737 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,0,"view must have one processor"); 1738 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1739 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1740 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not Mat object"); 1741 M = header[1]; N = header[2]; nz = header[3]; 1742 1743 if (header[3] < 0) { 1744 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as SeqBAIJ"); 1745 } 1746 1747 if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices"); 1748 1749 /* 1750 This code adds extra rows to make sure the number of rows is 1751 divisible by the blocksize 1752 */ 1753 mbs = M/bs; 1754 extra_rows = bs - M + bs*(mbs); 1755 if (extra_rows == bs) extra_rows = 0; 1756 else mbs++; 1757 if (extra_rows) { 1758 PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 1759 } 1760 1761 /* read in row lengths */ 1762 rowlengths = (int*)PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 1763 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1764 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1765 1766 /* read in column indices */ 1767 jj = (int*)PetscMalloc((nz+extra_rows)*sizeof(int));CHKPTRQ(jj); 1768 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1769 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1770 1771 /* loop over row lengths determining block row lengths */ 1772 browlengths = (int*)PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths); 1773 ierr = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1774 mask = (int*)PetscMalloc(2*mbs*sizeof(int));CHKPTRQ(mask); 1775 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1776 masked = mask + mbs; 1777 rowcount = 0; nzcount = 0; 1778 for (i=0; i<mbs; i++) { 1779 nmask = 0; 1780 for (j=0; j<bs; j++) { 1781 kmax = rowlengths[rowcount]; 1782 for (k=0; k<kmax; k++) { 1783 tmp = jj[nzcount++]/bs; 1784 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1785 } 1786 rowcount++; 1787 } 1788 browlengths[i] += nmask; 1789 /* zero out the mask elements we set */ 1790 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1791 } 1792 1793 /* create our matrix */ 1794 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr); 1795 B = *A; 1796 a = (Mat_SeqBAIJ*)B->data; 1797 1798 /* set matrix "i" values */ 1799 a->i[0] = 0; 1800 for (i=1; i<= mbs; i++) { 1801 a->i[i] = a->i[i-1] + browlengths[i-1]; 1802 a->ilen[i-1] = browlengths[i-1]; 1803 } 1804 a->nz = 0; 1805 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 1806 1807 /* read in nonzero values */ 1808 aa = (Scalar*)PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa); 1809 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1810 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1811 1812 /* set "a" and "j" values into matrix */ 1813 nzcount = 0; jcount = 0; 1814 for (i=0; i<mbs; i++) { 1815 nzcountb = nzcount; 1816 nmask = 0; 1817 for (j=0; j<bs; j++) { 1818 kmax = rowlengths[i*bs+j]; 1819 for (k=0; k<kmax; k++) { 1820 tmp = jj[nzcount++]/bs; 1821 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 1822 } 1823 rowcount++; 1824 } 1825 /* sort the masked values */ 1826 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1827 1828 /* set "j" values into matrix */ 1829 maskcount = 1; 1830 for (j=0; j<nmask; j++) { 1831 a->j[jcount++] = masked[j]; 1832 mask[masked[j]] = maskcount++; 1833 } 1834 /* set "a" values into matrix */ 1835 ishift = bs2*a->i[i]; 1836 for (j=0; j<bs; j++) { 1837 kmax = rowlengths[i*bs+j]; 1838 for (k=0; k<kmax; k++) { 1839 tmp = jj[nzcountb]/bs ; 1840 block = mask[tmp] - 1; 1841 point = jj[nzcountb] - bs*tmp; 1842 idx = ishift + bs2*block + j + bs*point; 1843 a->a[idx] = aa[nzcountb++]; 1844 } 1845 } 1846 /* zero out the mask elements we set */ 1847 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1848 } 1849 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"Bad binary matrix"); 1850 1851 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1852 ierr = PetscFree(browlengths);CHKERRQ(ierr); 1853 ierr = PetscFree(aa);CHKERRQ(ierr); 1854 ierr = PetscFree(jj);CHKERRQ(ierr); 1855 ierr = PetscFree(mask);CHKERRQ(ierr); 1856 1857 B->assembled = PETSC_TRUE; 1858 1859 ierr = MatView_Private(B);CHKERRQ(ierr); 1860 PetscFunctionReturn(0); 1861 } 1862 1863 1864 1865